In [39]:
#import dependencies
import pandas as pd
from pathlib import Path
import hvplot.pandas
import numpy as np

In [40]:
#read in CSV files
public_df = pd.read_csv(
    Path('./Resources/public_institutions.csv'),
    index_col='Year'
)

all_institutions_df = pd.read_csv(
    Path('./Resources/all_institutions.csv'),
    index_col='Year'
)

all_private_df = pd.read_csv(
    Path('./Resources/all_private_institutions.csv'),
    index_col='Year'
)

In [41]:
#confirm datatypes for the dataframes
display(public_df.dtypes)
display(all_institutions_df.dtypes)
display(all_private_df.dtypes)

All Institutions    int64
4-year              int64
2-year              int64
dtype: object

All Institutions    int64
4-year              int64
2-year              int64
dtype: object

All Institutions    int64
4-year              int64
2-year              int64
dtype: object

In [54]:
#correct the year 1985-86 for all dataframes
index = public_df.index              
index_list = index.tolist()
index_list[18] = '1985-86'
public_df.index=index_list
public_df.index.name='Year'

all_private_df.index=index_list
all_private_df.index.name='Year'

all_institutions_df.index=index_list
all_institutions_df.index.name='Year'
all_institutions_df.head()

Unnamed: 0_level_0,All Institutions,4-year,2-year
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1963-64,10648,10973,6616
1968-69,10762,11401,7769
1969-70,10868,11665,7589
1970-71,10948,11820,7422
1971-72,11066,12007,7493


In [61]:
#drop the 'all institutions' and '2-year' columns to focus only on 4-year expenses.
all_institutions_4_year = all_institutions_df.drop(columns=['All Institutions', '2-year'])

private_4_year = all_private_df.drop(columns=['All Institutions', '2-year'])

public_4_year = public_df.drop(columns=['All Institutions', '2-year'])
public_4_year.head()

Unnamed: 0_level_0,4-year
Year,Unnamed: 1_level_1
1963-64,7923
1968-69,8433
1969-70,8624
1970-71,8783
1971-72,8984


In [69]:
#concatenate the dataframes
all_tuition_df = pd.concat(
    [all_institutions_4_year, private_4_year, public_4_year],
    axis='columns',
    join='inner')

#rename columns
columns = ['All Institutions', 'Private', 'Public']
all_tuition_df.columns=columns
all_tuition_df.head()

Unnamed: 0_level_0,All Institutions,Private,Public
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1963-64,10973,15441,7923
1968-69,11401,17383,8433
1969-70,11665,17825,8624
1970-71,11820,18247,8783
1971-72,12007,18664,8984


In [72]:
#create dataframes for the percentage changes for each institution type
tuition_pct_change = all_tuition_df.pct_change().dropna()
tuition_pct_change.head()

Unnamed: 0_level_0,All Institutions,Private,Public
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1968-69,0.039005,0.125769,0.06437
1969-70,0.023156,0.025427,0.022649
1970-71,0.013288,0.023675,0.018437
1971-72,0.015821,0.022853,0.022885
1972-73,0.039644,0.018163,0.06289


In [79]:
all_tuition_df.hvplot(
    xlabel='School Year',
    ylabel='Total Tuition/Fees/Room and Board',
    title='Total Average University Expenses by Year - 2020 dollars',
    rot=90
)

In [86]:
tuition_pct_change.hvplot(
    xlabel='School Year',
    ylabel='Percent Change',
    title='Total Average University Expenses Percent Change, 1964-2021',
    rot=90,
)

In [87]:
tuition_pct_change.describe()

Unnamed: 0,All Institutions,Private,Public
count,53.0,53.0,53.0
mean,0.018866,0.021298,0.019259
std,0.026222,0.027375,0.028221
min,-0.061418,-0.048834,-0.07127
25%,0.00978,0.011358,0.009323
50%,0.022059,0.020849,0.022649
75%,0.036244,0.030422,0.035827
max,0.069243,0.125769,0.067389
