<a href="https://colab.research.google.com/github/jonathjd/Education-Project/blob/main/4_1_jd_plotly_map.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# import libraries
import pandas as pd
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objects as go
init_notebook_mode(connected=True)

In [13]:
# import data
df = pd.read_csv("https://raw.githubusercontent.com/jonathjd/Education-Project/main/data/processed/cleaned_data.csv")

In [14]:
# check head
df.head()

Unnamed: 0,UNITID,INSTNM,CITY,STABBR,ZIP,INSTURL,HIGHDEG,CONTROL,REGION,COSTT4_A,TUITIONFEE_IN,TUITIONFEE_OUT,TUITFTE,INEXPFTE,AVGFACSAL
0,100654,Alabama A & M University,Normal,AL,35762,www.aamu.edu/,4,1,5,23053.0,10024.0,18634.0,7870.0,5546.0,7709.0
1,100663,University of Alabama at Birmingham,Birmingham,AL,35294-0110,https://www.uab.edu/,4,1,5,24495.0,8568.0,20400.0,12096.0,14983.0,11049.0
2,100690,Amridge University,Montgomery,AL,36117-3553,www.amridgeuniversity.edu/,4,2,5,14800.0,6950.0,6950.0,16403.0,5245.0,4187.0
3,100706,University of Alabama in Huntsville,Huntsville,AL,35899,www.uah.edu/,4,1,5,23917.0,11122.0,23518.0,8275.0,8488.0,9688.0
4,100724,Alabama State University,Montgomery,AL,36104-0271,www.alasu.edu/,4,1,5,21866.0,11068.0,19396.0,9587.0,9346.0,7221.0


In [15]:
# Groupby state
# we will use the median values to get a more robust estimate of the middle of the data
df2 = df.groupby(by="STABBR").median().copy()
df2.head()

Unnamed: 0_level_0,UNITID,HIGHDEG,CONTROL,REGION,COSTT4_A,TUITIONFEE_IN,TUITIONFEE_OUT,TUITFTE,INEXPFTE,AVGFACSAL
STABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AK,102650.5,3.5,1.5,8.0,20545.0,8158.5,18864.5,7899.5,13054.5,7432.5
AL,101625.0,3.0,1.0,5.0,20173.9,9163.0,13530.5,7800.5,6210.0,6613.5
AR,107460.0,2.0,1.0,5.0,19101.0,7909.0,11032.0,4061.0,6785.0,5268.0
AZ,312312.5,3.0,2.0,6.0,20497.119048,9813.5,13028.0,9286.0,5207.5,6383.0
CA,122205.0,3.0,2.0,8.0,23096.142857,10612.333333,14040.0,10466.0,6755.0,8456.0


# Interactive Map using Plotly

We are going to generate an interactive US map displaying the ***median cost*** of attendance (**COSTT4_A**) using plotly. We will use the median as the data is positively skewed.

In [16]:
# Create dictionary with data
# This map will display median state income
data = dict(type='choropleth',
            locations = df2.index,
            locationmode='USA-states',
            z=df2['COSTT4_A'],
            colorscale="Reds",
            colorbar_title="USD"
            )

# configure layout
layout = dict(title="Median Cost of Attendance Per Year",
              geo=dict(scope='usa',
                      showlakes=True,
                      lakecolor="rgb(85,173,240)")
             )

In [17]:
# Create figure object
choromap = go.Figure(data=[data], layout=layout)

In [18]:
choromap.show()

We are going to export the grouped dataframe and transfer this code to our streamlit app.

In [24]:
# Lets export this dataset as a CSV to clean further later
df2.to_csv("state_df.csv")

In [25]:
from google.colab import files
files.download("state_df.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
df2

Unnamed: 0_level_0,UNITID,HIGHDEG,CONTROL,REGION,COSTT4_A,TUITIONFEE_IN,TUITIONFEE_OUT,TUITFTE,INEXPFTE,AVGFACSAL
STABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AK,102650.5,3.5,1.5,8.0,20545.0,8158.5,18864.5,7899.5,13054.5,7432.5
AL,101625.0,3.0,1.0,5.0,20173.9,9163.0,13530.5,7800.5,6210.0,6613.5
AR,107460.0,2.0,1.0,5.0,19101.0,7909.0,11032.0,4061.0,6785.0,5268.0
AZ,312312.5,3.0,2.0,6.0,20497.119048,9813.5,13028.0,9286.0,5207.5,6383.0
CA,122205.0,3.0,2.0,8.0,23096.142857,10612.333333,14040.0,10466.0,6755.0,8456.0
CO,127741.0,3.0,2.0,7.0,23911.0,9335.0,15350.0,10806.0,6182.0,6340.0
CT,129874.5,4.0,1.0,1.0,23925.5,14643.0,24606.0,11034.0,10062.5,9008.5
DC,131876.0,4.0,2.0,2.0,30268.0,17038.0,17038.0,15127.0,15932.0,8820.0
DE,131043.5,4.0,2.0,2.0,27291.0,13897.5,21037.0,12495.0,7566.5,7431.5
FL,262129.0,3.0,2.0,5.0,25098.0,13716.0,14814.0,10973.0,5131.0,5715.0
