In [27]:
# -*- coding: utf-8 -*-
%matplotlib inline

import re
import pandas as pd
import numpy as np
import math
import matplotlib as mplstyle
import matplotlib.pyplot as plt
import locale
import arrow
from locale import atof
from dateutil import parser
from datetime import datetime

plt.style.use('fivethirtyeight')

In [28]:
# Import default-only data

# FY 2014, FY 2013, and FY 2012 official cohort default rates published 
# for schools participating in the Title IV student financial assistance programs.
default_all_schools = pd.read_csv('data/_default/peps300_all.csv') 

# Schools subject to loss of Direct Loan Program and/or Pell Grant Program eligibility due
# to FY 2014, FY 2013, and FY 2012 official cohort default rates of 30% or greater. 
default_30plus = pd.read_csv('data/_default/peps300_all.csv')

# Schools subject to loss of Direct Loan Program due to FY 2014 official cohort 
# default rates greater than 40%.
default_40plus = pd.read_csv('data/_default/peps304_30+default.csv')

# FY 2014 official cohort default rates published for schools that may be eligible 
# for Benefits due to cohort default rates 5.0% and below.
default_5below = pd.read_csv('data/_default/peps751_5-default.csv')

# FY 2014, FY 2013, and FY 2012 official cohort default rates published for schools 
# that may be eligible for Benefits due to cohort default rates 15.0% and below for 3 years.
default_15below = pd.read_csv('data/_default/peps753_15-default.csv')

In [33]:
# Functions
def replaceYear(string):
    lastChar = string.strip()[-1]
    if (lastChar == '1'):
        string = re.sub(r'1', '2014', string)
    elif (lastChar == '2'):
        string = re.sub(r'2', '2013', string)
    elif (lastChar == '3'):
        string = re.sub(r'3', '2012', string)
    return string
    
def classify(string):
    string = re.sub(r'\(|\)', '', string)
    classified = string.lower().strip().replace(' ','-')
    return classified

def find_share(num,total):
    return round((num / total)*100, 2)

def removeSpecChars(string):
    string = string.replace('$','')
    string = string.replace(',','')
    num = float(string)
    return num

In [30]:
default_all_schools.rename(columns=lambda c: replaceYear(c), inplace=True)
default_all_schools.rename(columns=lambda c: classify(c), inplace=True)
default_all_schools

Unnamed: 0,opeid,name,address,city,state,state-desc,zip-code,zip-ext,prog length,school type,...,year-2013,dual num-2013,dual denom-2013,drate-2013,prate-2013,year-2012,dual num-2012,dual denom-2012,drate-2012,prate-2012
0,1002,ALABAMA AGRICULTURAL & MECHANICAL UNIVERSITY ...,4900 MERIDIAN STREET ...,NORMAL,AL,ALABAMA ...,35762,1357,8,1,...,2013,300,1812,16.5,A,2012,326,1895,17.2,A
1,1003,FAULKNER UNIVERSITY ...,5345 ATLANTA HIGHWAY ...,MONTGOMERY,AL,ALABAMA ...,36109,3398,8,2,...,2013,143,1491,9.5,A,2012,143,1417,10.0,A
2,1004,UNIVERSITY OF MONTEVALLO ...,PALMER CIRCLE ...,MONTEVALLO,AL,ALABAMA ...,35115,6000,8,1,...,2013,57,744,7.6,A,2012,83,805,10.3,A
3,1005,ALABAMA STATE UNIVERSITY ...,915 SOUTH JACKSON STREET ...,MONTGOMERY,AL,ALABAMA ...,36104,5714,8,1,...,2013,367,2196,16.7,P,2012,334,2137,15.6,P
4,1007,CENTRAL ALABAMA COMMUNITY COLLEGE ...,1675 CHEROKEE ROAD ...,ALEXANDER CITY,AL,ALABAMA ...,35010,0000,5,1,...,2013,146,659,22.1,P,2012,144,656,21.9,P
5,1008,ATHENS STATE UNIVERSITY ...,300 NORTH BEATY STREET ...,ATHENS,AL,ALABAMA ...,35611,1999,8,1,...,2013,51,959,5.3,A,2012,80,929,8.6,A
6,1009,AUBURN UNIVERSITY ...,107 SAMFORD HALL ...,AUBURN,AL,ALABAMA ...,36849,5113,8,1,...,2013,160,3727,4.2,P,2012,170,3772,4.5,P
7,1012,BIRMINGHAM-SOUTHERN COLLEGE ...,900 ARKADELPHIA ROAD ...,BIRMINGHAM,AL,ALABAMA ...,35254,0002,6,2,...,2013,8,271,2.9,A,2012,12,267,4.4,A
8,1013,CALHOUN COMMUNITY COLLEGE ...,6250 HIGHWAY 31 NORTH ...,TANNER,AL,ALABAMA ...,35671,0000,5,1,...,2013,357,2107,16.9,P,2012,325,1770,18.3,P
9,1015,ENTERPRISE STATE COMMUNITY COLLEGE ...,600 PLAZA DRIVE ...,ENTERPRISE,AL,ALABAMA ...,36331,1300,5,1,...,2013,113,554,20.3,A,2012,116,499,23.2,A


In [34]:
list(default_all_schools)

['opeid',
 'name',
 'address',
 'city',
 'state',
 'state-desc',
 'zip-code',
 'zip-ext',
 'prog\nlength',
 'school\ntype',
 'year-2014',
 'dual\nnum-2014',
 'dual\ndenom-2014',
 'drate-2014',
 'prate-2014',
 'ethnic-code',
 'program',
 'cong-dis',
 'region',
 'year-2013',
 'dual\nnum-2013',
 'dual\ndenom-2013',
 'drate-2013',
 'prate-2013',
 'year-2012',
 'dual\nnum-2012',
 'dual\ndenom-2012',
 'drate-2012',
 'prate-2012']

In [42]:
# Create dataframes for different school types
# by school type
public_schools = default_all_schools[default_all_schools['school\ntype'] == 1]
privatenp_schools = default_all_schools[default_all_schools['school\ntype'] == 2]
proprietary_schools = default_all_schools[default_all_schools['school\ntype'] == 3]

# by ethnic school type
nativeamer_schools = default_all_schools[default_all_schools['ethnic-code'] == 1]
hbcu_schools = default_all_schools[default_all_schools['ethnic-code'] == 2]
hispanic_schools = default_all_schools[default_all_schools['ethnic-code'] == 3]

In [48]:
nativeamer_schools

Unnamed: 0,opeid,name,address,city,state,state-desc,zip-code,zip-ext,prog length,school type,...,year-2013,dual num-2013,dual denom-2013,drate-2013,prate-2013,year-2012,dual num-2012,dual denom-2012,drate-2012,prate-2012
3129,21434,SALISH KOOTENAI COLLEGE ...,58138 US HIGHWAY 93 ...,PABLO,MT,MONTANA ...,59855,0,6,1,...,2013,38,219,17.3,A,2012,42,209,20.0,A
3136,21464,INSTITUTE OF AMERICAN INDIAN & ALASKA NATIVE C...,83 AVAN NU PO ROAD ...,SANTA FE,NM,NEW MEXICO ...,87508,1300,8,1,...,2013,27,99,27.2,B,2012,11,44,25.0,A
3303,22429,UNITED TRIBES TECHNICAL COLLEGE ...,3315 UNIVERSITY DRIVE ...,BISMARCK,ND,NORTH DAKOTA ...,58504,7596,6,2,...,2013,51,125,40.8,A,2012,62,139,44.6,A
3902,31291,FOND DU LAC TRIBAL & COMMUNITY COLLEGE ...,2101 14TH STREET ...,CLOQUET,MN,MINNESOTA ...,55720,2984,5,1,...,2013,136,501,27.1,A,2012,119,521,22.8,A
