In [102]:
#Get dependencies
import csv
import pandas as pd
import numpy as np
#For plotting on US map
import folium
from folium import plugins
from branca.colormap import linear

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

#Load dataset into CSV
bachelors_df = pd.read_csv('FY16_College_Data.csv')
#Remove any NULL values for cost
bachelors_df = bachelors_df[np.isfinite(bachelors_df['COSTT4_A'])]
bachelors_df.head()

Unnamed: 0.1,Unnamed: 0,UNITID,INSTNM,CITY,STABBR,REGION,LATITUDE,LONGITUDE,PREDDEG,COSTT4_A,...,C150_4_HISP,C150_4_ASIAN,C150_4_AIAN,C150_4_NHPI,C150_4_2MOR,C150_4_NRA,C150_4_UNKN,ADM_RATE,SAT_AVG,ACTCMMID
0,0,100654,Alabama A & M University,Normal,AL,5,34.783368,-86.568502,3,20809.0,...,0.0,0.0,0.3333,,,,0.0,0.6538,850.0,18.0
1,1,100663,University of Alabama at Birmingham,Birmingham,AL,5,33.50223,-86.80917,3,22232.0,...,0.5946,0.7722,0.5,1.0,0.7222,0.6875,0.4146,0.6043,1147.0,25.0
2,2,100690,Amridge University,Montgomery,AL,5,32.362609,-86.17401,3,12133.0,...,0.0,,,,,,,,,
3,3,100706,University of Alabama in Huntsville,Huntsville,AL,5,34.722818,-86.63842,3,20999.0,...,0.28,0.5333,0.3636,1.0,0.4375,0.6471,0.75,0.812,1221.0,27.0
4,4,100724,Alabama State University,Montgomery,AL,5,32.364317,-86.295677,3,18100.0,...,,1.0,1.0,0.0,,0.0,0.2667,0.4639,844.0,18.0


In [103]:
#Find the median latitude and logitude
middle_lat = bachelors_df['LATITUDE'].median()
middle_lon = bachelors_df['LONGITUDE'].median()

#Create a color scale for cost
colormap = linear.YlOrRd.scale(
    bachelors_df['COSTT4_A'].min(),
    bachelors_df['COSTT4_A'].max())

colormap


In [123]:

#Plot instititions on map by cost 
cost_map = folium.Map(location=[middle_lat, middle_lon],
                          zoom_start=4)

# mark each institution as a point
for index, row in bachelors_df.iterrows():
    folium.CircleMarker([row['LATITUDE'], row['LONGITUDE']],
                        radius=2,
                        fill=True,
                        fill_color=colormap(row['COSTT4_A']),
                        color=colormap(row['COSTT4_A']),
                        fill_opacity=0.5,
                        legend_html = '<h1>Tuition</h1>'
                        ).add_to(cost_map)


cost_map.save('Colleges_Cost_Distribution.html')

cost_map

In [120]:
#Get list of State Names and Abbreviations
states_df = pd.read_csv('list-states-us.csv')
states_df.columns = ['id','name', 'State']
states_df.head()

Unnamed: 0,id,name,State
0,2,Alaska,AK
1,3,Arizona,AZ
2,4,Arkansas,AR
3,5,California,CA
4,6,Colorado,CO


In [121]:

#Get cost data by State
cost_state_grouped = bachelors_df.groupby('STABBR')

#Get all unique states
states = bachelors_df['STABBR'].unique()

#Combine State Abbreviation, State Name and Average Cost into one DataFrame
cost_states_df = pd.DataFrame(states)
cost_states_df.columns = ['State']
cost_states_df['Average Cost'] = ''

#Loop through states and find average cost
for index, row in cost_states_df.iterrows():
    cost_df = cost_state_grouped.get_group(row['State'])
    average = np.mean(cost_df['COSTT4_A'])
    cost_states_df.at[index,'Average Cost'] = average
    
cost_states_df = pd.merge(cost_states_df, states_df, on='State', how='inner')
    
len(cost_states_df)

49

In [122]:

#Plot average costs by State
average_map = folium.Map(location=[43, -100],
                          zoom_start=4)

average_map.choropleth(
    geo_data=open('us-states.json').read(),
    data=cost_states_df,
    columns=['name', 'Average Cost'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    legend_name='Average Cost of Tuition'
    )

average_map.save('Average_Cost_Distribution.html')

average_map