### College Scorecard

## Introduction


## 1. Imports

In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

## 2. Load College Scorecard Data

In [2]:
data = pd.read_csv('MERGED2018_19_PP.csv', low_memory=False)
data.shape

(6806, 1986)

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6806 entries, 0 to 6805
Columns: 1986 entries, UNITID to SCUGFFN_POOLED
dtypes: float64(1902), int64(14), object(70)
memory usage: 103.1+ MB


In [4]:
data.head()

Unnamed: 0,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,OMENRUP_PARTTIME_POOLED_SUPP,FTFTPCTPELL,FTFTPCTFLOAN,UG12MN,G12MN,SCUGFFN,POOLYRS_FTFTAIDPCT,FTFTPCTPELL_POOLED_SUPP,FTFTPCTFLOAN_POOLED_SUPP,SCUGFFN_POOLED
0,100654,100200,1002,Alabama A & M University,Normal,AL,35762,Southern Association of Colleges and Schools C...,www.aamu.edu/,www.aamu.edu/admissions-aid/tuition-fees/net-p...,...,0.3193,0.7057,0.7143,5343.0,1165.0,1288.0,2.0,0.7083,0.7287,2698.0
1,100663,105200,1052,University of Alabama at Birmingham,Birmingham,AL,35294-0110,Southern Association of Colleges and Schools C...,https://www.uab.edu,https://uab.studentaidcalculator.com/survey.aspx,...,0.2475,0.3788,0.535,14445.0,10498.0,2228.0,2.0,0.3891,0.5414,4176.0
2,100690,2503400,25034,Amridge University,Montgomery,AL,36117-3553,Southern Association of Colleges and Schools C...,www.amridgeuniversity.edu,www2.amridgeuniversity.edu:9091/,...,0.2836,1.0,1.0,440.0,527.0,5.0,4.0,PrivacySuppressed,PrivacySuppressed,24.0
3,100706,105500,1055,University of Alabama in Huntsville,Huntsville,AL,35899,Southern Association of Colleges and Schools C...,www.uah.edu,finaid.uah.edu/,...,0.2496,0.2707,0.4556,8145.0,2443.0,1341.0,2.0,0.2378,0.4402,2544.0
4,100724,100500,1005,Alabama State University,Montgomery,AL,36104-0271,Southern Association of Colleges and Schools C...,www.alasu.edu,www.alasu.edu/cost-aid/tuition-costs/net-price...,...,0.2621,0.7792,0.7539,4732.0,642.0,951.0,2.0,0.7684,0.7464,2094.0


## 3. Number of Missing Values by Column

In [5]:
missing = pd.concat([data.isnull().sum(), 100 * data.isnull().mean()], axis=1)
missing.columns=['count', '%']
missing.sort_values(by='count')

Unnamed: 0,count,%
UNITID,0,0.0
CURROPER,0,0.0
OPEFLAG,0,0.0
REGION,0,0.0
ST_FIPS,0,0.0
...,...,...
MALE_UNKN_2YR_TRANS_YR4_RT,6806,100.0
MALE_UNKN_4YR_TRANS_YR4_RT,6806,100.0
MALE_UNKN_ORIG_YR4_RT,6806,100.0
PELL_ENRL_ORIG_YR4_RT,6806,100.0


In [31]:
#replace "PrivacySuppressed" to Null
data.replace('PrivacySuppressed', np.nan);

In [32]:
#select only the columns with 90% of non-missing values 
cols_to_delete = data.columns[data.isnull().sum()/len(data) > .90]
data.drop(cols_to_delete, axis = 1, inplace = True)

In [33]:
data.shape

(6806, 330)

In [34]:
data.head()

Unnamed: 0,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,OMAWDP8_PARTTIME_POOLED_SUPP,OMENRUP_PARTTIME_POOLED_SUPP,FTFTPCTPELL,UG12MN,G12MN,SCUGFFN,POOLYRS_FTFTAIDPCT,FTFTPCTPELL_POOLED_SUPP,FTFTPCTFLOAN_POOLED_SUPP,SCUGFFN_POOLED
0,100654,100200,1002,Alabama A & M University,Normal,AL,35762,Southern Association of Colleges and Schools C...,www.aamu.edu/,www.aamu.edu/admissions-aid/tuition-fees/net-p...,...,0.2062,0.3193,0.7057,5343.0,1165.0,1288.0,2.0,0.7083,0.7287,2698.0
1,100663,105200,1052,University of Alabama at Birmingham,Birmingham,AL,35294-0110,Southern Association of Colleges and Schools C...,https://www.uab.edu,https://uab.studentaidcalculator.com/survey.aspx,...,0.4179,0.2475,0.3788,14445.0,10498.0,2228.0,2.0,0.3891,0.5414,4176.0
2,100690,2503400,25034,Amridge University,Montgomery,AL,36117-3553,Southern Association of Colleges and Schools C...,www.amridgeuniversity.edu,www2.amridgeuniversity.edu:9091/,...,0.4627,0.2836,1.0,440.0,527.0,5.0,4.0,PrivacySuppressed,PrivacySuppressed,24.0
3,100706,105500,1055,University of Alabama in Huntsville,Huntsville,AL,35899,Southern Association of Colleges and Schools C...,www.uah.edu,finaid.uah.edu/,...,0.3371,0.2496,0.2707,8145.0,2443.0,1341.0,2.0,0.2378,0.4402,2544.0
4,100724,100500,1005,Alabama State University,Montgomery,AL,36104-0271,Southern Association of Colleges and Schools C...,www.alasu.edu,www.alasu.edu/cost-aid/tuition-costs/net-price...,...,0.2136,0.2621,0.7792,4732.0,642.0,951.0,2.0,0.7684,0.7464,2094.0


In [35]:
list(data.columns.values) 

['UNITID',
 'OPEID',
 'OPEID6',
 'INSTNM',
 'CITY',
 'STABBR',
 'ZIP',
 'ACCREDAGENCY',
 'INSTURL',
 'NPCURL',
 'NUMBRANCH',
 'PREDDEG',
 'HIGHDEG',
 'CONTROL',
 'ST_FIPS',
 'REGION',
 'LOCALE',
 'LATITUDE',
 'LONGITUDE',
 'CCBASIC',
 'CCUGPROF',
 'CCSIZSET',
 'RELAFFIL',
 'ADM_RATE',
 'ADM_RATE_ALL',
 'SATVR25',
 'SATVR75',
 'SATMT25',
 'SATMT75',
 'SATVRMID',
 'SATMTMID',
 'ACTCM25',
 'ACTCM75',
 'ACTEN25',
 'ACTEN75',
 'ACTMT25',
 'ACTMT75',
 'ACTCMMID',
 'ACTENMID',
 'ACTMTMID',
 'SAT_AVG',
 'SAT_AVG_ALL',
 'UGDS',
 'UGDS_WHITE',
 'UGDS_BLACK',
 'UGDS_HISP',
 'CURROPER',
 'NPT4_PUB',
 'NPT4_PRIV',
 'NPT41_PUB',
 'NPT42_PUB',
 'NPT43_PUB',
 'NPT44_PUB',
 'NPT45_PUB',
 'NPT41_PRIV',
 'NPT42_PRIV',
 'NPT43_PRIV',
 'NPT44_PRIV',
 'NPT45_PRIV',
 'NPT4_048_PUB',
 'NPT4_048_PRIV',
 'NPT4_3075_PUB',
 'NPT4_3075_PRIV',
 'NPT4_75UP_PUB',
 'NPT4_75UP_PRIV',
 'NUM4_PUB',
 'NUM4_PRIV',
 'NUM41_PUB',
 'NUM42_PUB',
 'NUM43_PUB',
 'NUM44_PUB',
 'NUM45_PUB',
 'NUM41_PRIV',
 'COSTT4_A',
 'COSTT4_P',