Data Exploration & Visualization 
IPEDS Trends in Staffing: Yale


Institutional Characteristics, Student FTE, Staff FTE
Fall 2002 - Spring 2022

In [1]:
import pandas as pd
import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots



Step 1:
Upload the master merged IPEDS dataset

In [2]:
df = pd.read_csv("ic_student_staff_master.csv",low_memory=False,encoding = "ISO-8859-1")
df['year'] = pd.to_datetime(df.year,format='%Y')
df['year'] = df['year'].dt.strftime('%Y')
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56196 entries, 0 to 56195
Data columns (total 38 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   unitid                                     56196 non-null  int64  
 1   instnm                                     56196 non-null  object 
 2   addr                                       55961 non-null  object 
 3   city                                       56196 non-null  object 
 4   stabbr                                     56196 non-null  object 
 5   zip                                        56196 non-null  object 
 6   obereg                                     56196 non-null  object 
 7   sector                                     56196 non-null  object 
 8   iclevel                                    56196 non-null  object 
 9   control                                    56196 non-null  object 
 10  deggrant              

Step 2: 
Create dataframes for comparison:
1. Yale 
3. Average - Peers  - defined as carnegie type: Research
3. Average - All Institutions

Yale data restructuring

In [4]:
yale = df[df['unitid']==130794]
yale = yale.reset_index(drop=True)
yale.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 38 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   unitid                                     20 non-null     int64  
 1   instnm                                     20 non-null     object 
 2   addr                                       17 non-null     object 
 3   city                                       20 non-null     object 
 4   stabbr                                     20 non-null     object 
 5   zip                                        20 non-null     object 
 6   obereg                                     20 non-null     object 
 7   sector                                     20 non-null     object 
 8   iclevel                                    20 non-null     object 
 9   control                                    20 non-null     object 
 10  deggrant                    

In [6]:
phd = df[df['carnegie_grouped']=='Doctoral or Research Universities']
phd = phd.reset_index(drop=True)
phd.info(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5081 entries, 0 to 5080
Data columns (total 38 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   unitid                                     5081 non-null   int64  
 1   instnm                                     5081 non-null   object 
 2   addr                                       4933 non-null   object 
 3   city                                       5081 non-null   object 
 4   stabbr                                     5081 non-null   object 
 5   zip                                        5081 non-null   object 
 6   obereg                                     5081 non-null   object 
 7   sector                                     5081 non-null   object 
 8   iclevel                                    5081 non-null   object 
 9   control                                    5081 non-null   object 
 10  deggrant                

Yale - Percentage Change - FTE
1. Annual

In [8]:
yale_pct_change = yale[['unitid','year','student fte','total staff fte','total instruction staff fte','total non instruction staff fte','student fte per staff fte','student fte per instruction staff fte','student fte per non instruction staff fte']]
yale_pct_change = yale_pct_change.reset_index(drop=True)
yale_pct_change.head(2)

Unnamed: 0,unitid,year,student fte,total staff fte,total instruction staff fte,total non instruction staff fte,student fte per staff fte,student fte per instruction staff fte,student fte per non instruction staff fte
0,130794,2002,11261,5588.0,1309.0,4279.0,2.02,8.6,2.63
1,130794,2003,11315,5924.0,1422.0,4502.0,1.91,7.96,2.51


In [73]:
yale_pct_change_ratio_all_mean = yale_pct_change['student fte per staff fte'].mean()
round(yale_pct_change_ratio_all_mean,2)
print(yale_pct_change_ratio_all_mean)

1.653


In [74]:
yale_pct_change['student fte pct change'] = yale_pct_change['student fte'].pct_change()
yale_pct_change['student fte pct change'] = (yale_pct_change['student fte pct change']*100).round(2)
yale_pct_change = yale_pct_change.reset_index(drop=True)

yale_pct_change['staff fte pct change'] = yale_pct_change['total staff fte'].pct_change()
yale_pct_change['staff fte pct change'] = (yale_pct_change['staff fte pct change']*100).round(2)
yale_pct_change = yale_pct_change.reset_index(drop=True)

yale_pct_change['instruction fte pct change'] = yale_pct_change['total instruction staff fte'].pct_change()
yale_pct_change['instruction fte pct change'] = (yale_pct_change['instruction fte pct change']*100).round(2)
yale_pct_change = yale_pct_change.reset_index(drop=True)

yale_pct_change['non instruction fte pct change'] = yale_pct_change['total non instruction staff fte'].pct_change()
yale_pct_change['non instruction fte pct change'] = (yale_pct_change['non instruction fte pct change']*100).round(2)
yale_pct_change = yale_pct_change.reset_index(drop=True)

yale_pct_change = yale_pct_change.fillna(0.00)

Yale - Percentage Change - FTE
2. Cumulative

In [75]:
yale_base_student = 11261
base_date = '2002-01-01'

def yale_pct_change_period_student(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte']-yale_base_student)/yale_base_student)*100)
    else:
        return 0.0
    
yale_pct_change['student pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_student(row), axis=1)
yale_pct_change['student pct change cumulative'] = yale_pct_change['student pct change cumulative'].round(2)

yale_pct_change = yale_pct_change.fillna(0.00)
#print(yale_pct_change)

In [76]:
yale_base_staff = 5588.0
base_date = '2002-01-01'

def yale_pct_change_period_staff(row):
    if row['year'] != '2002-01-01':
        return (((row['total staff fte']-yale_base_staff)/yale_base_staff)*100)
    else:
        return 0.0
    
yale_pct_change['staff pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_staff(row), axis=1)
yale_pct_change['staff pct change cumulative'] = yale_pct_change['staff pct change cumulative'].round(2)

yale_pct_change = yale_pct_change.fillna(0.00)
#print(yale_pct_change)

In [77]:
yale_base_instruction = 1309.0
base_date = '2002-01-01'

def yale_pct_change_period_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total instruction staff fte']-yale_base_instruction)/yale_base_instruction)*100)
    else:
        return 0.0
    
yale_pct_change['instruction pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_instruction(row), axis=1)
yale_pct_change['instruction pct change cumulative'] = yale_pct_change['instruction pct change cumulative'].round(2)

yale_pct_change = yale_pct_change.fillna(0.00)

In [78]:
yale_base_non_instruction = 4279.0
base_date = '2002-01-01'

def yale_pct_change_period_non_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total non instruction staff fte']-yale_base_non_instruction)/yale_base_non_instruction)*100)
    else:
        return 0.0
    
yale_pct_change['non instruction pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_non_instruction(row), axis=1)
yale_pct_change['non instruction pct change cumulative'] = yale_pct_change['non instruction pct change cumulative'].round(2)

yale_pct_change = yale_pct_change.fillna(0.00)

yale_pct_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 23 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   unitid                                                     20 non-null     int64  
 1   year                                                       20 non-null     object 
 2   student fte                                                20 non-null     int64  
 3   total staff fte                                            20 non-null     float64
 4   total instruction staff fte                                20 non-null     float64
 5   total non instruction staff fte                            20 non-null     float64
 6   student fte per staff fte                                  20 non-null     float64
 7   student fte per instruction staff fte                      20 non-null     float64
 8   student fte 

Yale - Percentage Change - Ratio 
1. Annual

In [79]:
yale_pct_change['student staff ratio pct change'] = yale_pct_change['student fte per staff fte'].pct_change()
yale_pct_change['student staff ratio pct change'] = (yale_pct_change['student staff ratio pct change']*100).round(2)

yale_pct_change['student instruction staff ratio pct change'] = yale_pct_change['student fte per instruction staff fte'].pct_change()
yale_pct_change['student instruction staff ratio pct change'] = (yale_pct_change['student instruction staff ratio pct change']*100).round(2)

yale_pct_change['student non instruction staff ratio pct change'] = yale_pct_change['student fte per non instruction staff fte'].pct_change()
yale_pct_change['student non instruction staff ratio pct change'] = (yale_pct_change['student non instruction staff ratio pct change']*100).round(2)

yale_pct_change = yale_pct_change.fillna(0.00)

Yale - Percentage Change - Ratio 
1. Cumulative

In [80]:
yale_base_staff_ratio = 2.02
base_date = '2002-01-01'

def yale_pct_change_period_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per staff fte']-yale_base_staff_ratio)/yale_base_staff_ratio)*100)
    else:
        return 0.0
    
yale_pct_change['student staff ratio pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_staff_ratio(row), axis=1)
yale_pct_change['student staff ratio pct change cumulative'] = yale_pct_change['student staff ratio pct change cumulative'].round(2)


In [81]:
yale_base_instruction_staff_ratio = 8.60
base_date = '2002-01-01'

def yale_pct_change_period_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per instruction staff fte']-yale_base_instruction_staff_ratio)/yale_base_instruction_staff_ratio)*100)
    else:
        return 0.0
    
yale_pct_change['student instruction staff ratio pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_instruction_staff_ratio(row), axis=1)
yale_pct_change['student instruction staff ratio pct change cumulative'] = yale_pct_change['student instruction staff ratio pct change cumulative'].round(2)

In [82]:
yale_base_non_instruction_staff_ratio = 2.63
base_date = '2002-01-01'

def yale_pct_change_period_non_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per non instruction staff fte']-yale_base_non_instruction_staff_ratio)/yale_base_non_instruction_staff_ratio)*100)
    else:
        return 0.0
    
yale_pct_change['student non instruction staff ratio pct change cumulative'] = yale_pct_change.apply(lambda row: yale_pct_change_period_non_instruction_staff_ratio(row), axis=1)
yale_pct_change['student non instruction staff ratio pct change cumulative'] = yale_pct_change['student non instruction staff ratio pct change cumulative'].round(2)

In [83]:
yale_pct_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 23 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   unitid                                                     20 non-null     int64  
 1   year                                                       20 non-null     object 
 2   student fte                                                20 non-null     int64  
 3   total staff fte                                            20 non-null     float64
 4   total instruction staff fte                                20 non-null     float64
 5   total non instruction staff fte                            20 non-null     float64
 6   student fte per staff fte                                  20 non-null     float64
 7   student fte per instruction staff fte                      20 non-null     float64
 8   student fte 

Yale - Restacking datasets - by type
1. FTE Total, FTE Percentage Chanage - Annual, FTE Percentage Chanage - Cumulative
2. Ratio Total, Ratio Percentage Chanage - Annual, Ratio Percentage Chanage - Cumulative

In [84]:
cols_fte = ['unitid','year','fte','fte pct change','fte pct change cumulative','type']

yale_fte_pct_change_total_student = yale_pct_change[['unitid','year','student fte','student fte pct change','student pct change cumulative']]
yale_fte_pct_change_total_student = yale_fte_pct_change_total_student.reset_index(drop=True)
yale_fte_pct_change_total_student["type"] = "student"
yale_fte_pct_change_total_student.columns = cols_fte
yale_fte_pct_change_total_student = yale_fte_pct_change_total_student.reset_index(drop=True)
#yale_pct_change_total_student.head(2)

yale_fte_pct_change_total_staff = yale_pct_change[['unitid','year','total staff fte','staff fte pct change','staff pct change cumulative']]
yale_fte_pct_change_total_staff = yale_fte_pct_change_total_staff.reset_index(drop=True)
yale_fte_pct_change_total_staff["type"] = "all staff"
yale_fte_pct_change_total_staff.columns = cols_fte
yale_fte_pct_change_total_staff = yale_fte_pct_change_total_staff.reset_index(drop=True)
#yale_pct_change_total_staff.head(2)

yale_fte_pct_change_total_instruction = yale_pct_change[['unitid','year','total instruction staff fte','instruction fte pct change','instruction pct change cumulative']]
yale_fte_pct_change_total_instruction = yale_fte_pct_change_total_instruction.reset_index(drop=True)
yale_fte_pct_change_total_instruction["type"] = "instructional staff"
yale_fte_pct_change_total_instruction.columns = cols_fte
yale_fte_pct_change_total_instruction = yale_fte_pct_change_total_instruction.reset_index(drop=True)
#yale_pct_change_total_non_instruction.head(2)

yale_fte_pct_change_total_non_instruction = yale_pct_change[['unitid','year','total non instruction staff fte','non instruction fte pct change','non instruction pct change cumulative']]
yale_fte_pct_change_total_non_instruction = yale_fte_pct_change_total_non_instruction.reset_index(drop=True)
yale_fte_pct_change_total_non_instruction["type"] = "non instructional staff"
yale_fte_pct_change_total_non_instruction.columns = cols_fte
yale_fte_pct_change_total_non_instruction = yale_fte_pct_change_total_non_instruction.reset_index(drop=True)
#yale_pct_change_total_non_instruction.head(2)

yale_fte_pct_change_by_type = pd.concat([yale_fte_pct_change_total_student,yale_fte_pct_change_total_staff,yale_fte_pct_change_total_instruction,yale_fte_pct_change_total_non_instruction],ignore_index=False)
yale_fte_pct_change_by_type=yale_fte_pct_change_by_type.reset_index(drop=True)

yale_fte_pct_change_by_type.head(2)

Unnamed: 0,unitid,year,fte,fte pct change,fte pct change cumulative,type
0,130794,2002,11261.0,0.0,0.0,student
1,130794,2003,11315.0,0.48,0.48,student


In [85]:
cols_ratio = ['unitid','year','student fte per staff fte','ratio pct change','ratio pct change cumulative','type']

yale_ratio_pct_change_total_staff = yale_pct_change[['unitid','year','student fte per staff fte','student staff ratio pct change','student staff ratio pct change cumulative']]
yale_ratio_pct_change_total_staff = yale_ratio_pct_change_total_staff.reset_index(drop=True)
yale_ratio_pct_change_total_staff["type"] = "all staff"
yale_ratio_pct_change_total_staff.columns = cols_ratio
yale_ratio_pct_change_total_staff = yale_ratio_pct_change_total_staff.reset_index(drop=True)
#yale_pct_change_total_staff.head(2)

yale_ratio_pct_change_total_instruction = yale_pct_change[['unitid','year','student fte per instruction staff fte','student instruction staff ratio pct change','student instruction staff ratio pct change cumulative']]
yale_ratio_pct_change_total_instruction = yale_ratio_pct_change_total_instruction.reset_index(drop=True)
yale_ratio_pct_change_total_instruction["type"] = "instructional staff"
yale_ratio_pct_change_total_instruction.columns = cols_ratio
yale_ratio_pct_change_total_instruction = yale_ratio_pct_change_total_instruction.reset_index(drop=True)
#yale_pct_change_total_non_instruction.head(2)

yale_ratio_pct_change_total_non_instruction = yale_pct_change[['unitid','year','student fte per non instruction staff fte','student non instruction staff ratio pct change','student non instruction staff ratio pct change cumulative']]
yale_ratio_pct_change_total_non_instruction = yale_ratio_pct_change_total_non_instruction.reset_index(drop=True)
yale_ratio_pct_change_total_non_instruction["type"] = "non instructional staff"
yale_ratio_pct_change_total_non_instruction.columns = cols_ratio
yale_ratio_pct_change_total_non_instruction = yale_ratio_pct_change_total_non_instruction.reset_index(drop=True)
#yale_pct_change_total_non_instruction.head(2)

yale_ratio_pct_change_by_type = pd.concat([yale_ratio_pct_change_total_staff,yale_ratio_pct_change_total_instruction,yale_ratio_pct_change_total_non_instruction],ignore_index=False)
yale_ratio_pct_change_by_type=yale_ratio_pct_change_by_type.reset_index(drop=True)

yale_ratio_pct_change_by_type.head(2)

Unnamed: 0,unitid,year,student fte per staff fte,ratio pct change,ratio pct change cumulative,type
0,130794,2002,2.02,0.0,0.0,all staff
1,130794,2003,1.91,-5.45,-5.45,all staff


Phd - FTE - Percentage Change
1. Annual

In [99]:
phd_pct_change = phd.groupby(['year']).agg({'student fte':'sum','total staff fte':'sum','total instruction staff fte':'sum','total non instruction staff fte':'sum','student fte per staff fte':'mean','student fte per instruction staff fte':'mean','student fte per non instruction staff fte':'mean'})
phd_pct_change = phd_pct_change.reset_index()
phd_pct_change.columns = ['year','student fte','total staff fte','total instruction staff fte','total non instruction staff fte','student fte per staff fte','student fte per instruction staff fte','student fte per non instruction staff fte']
phd_pct_change['student fte per staff fte'] = phd_pct_change['student fte per staff fte'].round(2)
phd_pct_change['student fte per instruction staff fte'] = phd_pct_change['student fte per instruction staff fte'].round(2)
phd_pct_change['student fte per non instruction staff fte'] = phd_pct_change['student fte per non instruction staff fte'].round(2)

phd_pct_change.head(2)

Unnamed: 0,year,student fte,total staff fte,total instruction staff fte,total non instruction staff fte,student fte per staff fte,student fte per instruction staff fte,student fte per non instruction staff fte
0,2002,3900471,917224.0,264881.0,652367.0,4.98,15.35,7.73
1,2003,3983501,907763.0,260977.0,646815.0,5.03,15.57,7.84


In [100]:
phd_pct_change['student fte pct change'] = phd_pct_change['student fte'].pct_change()
phd_pct_change['student fte pct change'] = (phd_pct_change['student fte pct change']*100).round(2)
phd_pct_change = phd_pct_change.reset_index(drop=True)

phd_pct_change['staff fte pct change'] = phd_pct_change['total staff fte'].pct_change()
phd_pct_change['staff fte pct change'] = (phd_pct_change['staff fte pct change']*100).round(2)
phd_pct_change = phd_pct_change.reset_index(drop=True)

phd_pct_change['instruction fte pct change'] = phd_pct_change['total instruction staff fte'].pct_change()
phd_pct_change['instruction fte pct change'] = (phd_pct_change['instruction fte pct change']*100).round(2)
phd_pct_change = phd_pct_change.reset_index(drop=True)

phd_pct_change['non instruction fte pct change'] = phd_pct_change['total non instruction staff fte'].pct_change()
phd_pct_change['non instruction fte pct change'] = (phd_pct_change['non instruction fte pct change']*100).round(2)
phd_pct_change = phd_pct_change.reset_index(drop=True)

phd_pct_change = phd_pct_change.fillna(0.00)

phd_pct_change.head(2)

Unnamed: 0,year,student fte,total staff fte,total instruction staff fte,total non instruction staff fte,student fte per staff fte,student fte per instruction staff fte,student fte per non instruction staff fte,student fte pct change,staff fte pct change,instruction fte pct change,non instruction fte pct change
0,2002,3900471,917224.0,264881.0,652367.0,4.98,15.35,7.73,0.0,0.0,0.0,0.0
1,2003,3983501,907763.0,260977.0,646815.0,5.03,15.57,7.84,2.13,-1.03,-1.47,-0.85


Phd - Percentage Change - FTE
2. Cumulative

In [101]:
phd_base_student = 3900480
base_date = '2002-01-01'

def phd_pct_change_period_student(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte']-phd_base_student)/phd_base_student)*100)
    else:
        return 0.0
    
phd_pct_change['student pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_student(row), axis=1)
phd_pct_change['student pct change cumulative'] = phd_pct_change['student pct change cumulative'].round(2)

phd_pct_change = phd_pct_change.fillna(0.00)
#print(yale_pct_change)

In [102]:
phd_base_staff = 917224.0
base_date = '2002-01-01'

def phd_pct_change_period_staff(row):
    if row['year'] != '2002-01-01':
        return (((row['total staff fte']-phd_base_staff)/phd_base_staff)*100)
    else:
        return 0.0
    
phd_pct_change['staff pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_staff(row), axis=1)
phd_pct_change['staff pct change cumulative'] = phd_pct_change['staff pct change cumulative'].round(2)

phd_pct_change = phd_pct_change.fillna(0.00)
#print(yale_pct_change)

In [103]:
phd_base_instruction = 264881.0
base_date = '2002-01-01'

def phd_pct_change_period_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total instruction staff fte']-phd_base_instruction)/phd_base_instruction)*100)
    else:
        return 0.0
    
phd_pct_change['instruction pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_instruction(row), axis=1)
phd_pct_change['instruction pct change cumulative'] = phd_pct_change['instruction pct change cumulative'].round(2)

phd_pct_change = phd_pct_change.fillna(0.00)

In [104]:
phd_base_non_instruction = 652367.0
base_date = '2002-01-01'

def phd_pct_change_period_non_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total non instruction staff fte']-phd_base_non_instruction)/phd_base_non_instruction)*100)
    else:
        return 0.0
    
phd_pct_change['non instruction pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_non_instruction(row), axis=1)
phd_pct_change['non instruction pct change cumulative'] = phd_pct_change['non instruction pct change cumulative'].round(2)

phd_pct_change = phd_pct_change.fillna(0.00)

phd_pct_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 16 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   year                                       20 non-null     object 
 1   student fte                                20 non-null     int64  
 2   total staff fte                            20 non-null     float64
 3   total instruction staff fte                20 non-null     float64
 4   total non instruction staff fte            20 non-null     float64
 5   student fte per staff fte                  20 non-null     float64
 6   student fte per instruction staff fte      20 non-null     float64
 7   student fte per non instruction staff fte  20 non-null     float64
 8   student fte pct change                     20 non-null     float64
 9   staff fte pct change                       20 non-null     float64
 10  instruction fte pct change  

Phd - Percentage Change - Ratio 
1. Annual

In [105]:
phd_pct_change['student staff ratio pct change'] = phd_pct_change['student fte per staff fte'].pct_change()
phd_pct_change['student staff ratio pct change'] = (phd_pct_change['student staff ratio pct change']*100).round(2)

phd_pct_change['student instruction staff ratio pct change'] = phd_pct_change['student fte per instruction staff fte'].pct_change()
phd_pct_change['student instruction staff ratio pct change'] = (phd_pct_change['student instruction staff ratio pct change']*100).round(2)

phd_pct_change['student non instruction staff ratio pct change'] = phd_pct_change['student fte per non instruction staff fte'].pct_change()
phd_pct_change['student non instruction staff ratio pct change'] = (phd_pct_change['student non instruction staff ratio pct change']*100).round(2)

phd_pct_change = phd_pct_change.fillna(0.00)

Phd - Percentage Change - Ratio 
1. Cumulative

In [106]:
phd_base_staff_ratio = 4.98
base_date = '2002-01-01'

def phd_pct_change_period_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per staff fte']-phd_base_staff_ratio)/phd_base_staff_ratio)*100)
    else:
        return 0.0
    
phd_pct_change['student staff ratio pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_staff_ratio(row), axis=1)
phd_pct_change['student staff ratio pct change cumulative'] = phd_pct_change['student staff ratio pct change cumulative'].round(2)


In [107]:
phd_base_instruction_staff_ratio = 15.35
base_date = '2002-01-01'

def phd_pct_change_period_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per instruction staff fte']-phd_base_instruction_staff_ratio)/phd_base_instruction_staff_ratio)*100)
    else:
        return 0.0
    
phd_pct_change['student instruction staff ratio pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_instruction_staff_ratio(row), axis=1)
phd_pct_change['student instruction staff ratio pct change cumulative'] = phd_pct_change['student instruction staff ratio pct change cumulative'].round(2)

In [108]:
phd_base_non_instruction_staff_ratio = 2.63
base_date = '2002-01-01'

def phd_pct_change_period_non_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per non instruction staff fte']-phd_base_non_instruction_staff_ratio)/phd_base_non_instruction_staff_ratio)*100)
    else:
        return 0.0
    
phd_pct_change['student non instruction staff ratio pct change cumulative'] = phd_pct_change.apply(lambda row: phd_pct_change_period_non_instruction_staff_ratio(row), axis=1)
phd_pct_change['student non instruction staff ratio pct change cumulative'] = phd_pct_change['student non instruction staff ratio pct change cumulative'].round(2)

In [109]:
phd_pct_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 22 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   year                                                       20 non-null     object 
 1   student fte                                                20 non-null     int64  
 2   total staff fte                                            20 non-null     float64
 3   total instruction staff fte                                20 non-null     float64
 4   total non instruction staff fte                            20 non-null     float64
 5   student fte per staff fte                                  20 non-null     float64
 6   student fte per instruction staff fte                      20 non-null     float64
 7   student fte per non instruction staff fte                  20 non-null     float64
 8   student fte 

Yale - Restacking datasets - by type
1. FTE Total, FTE Percentage Chanage - Annual, FTE Percentage Chanage - Cumulative
2. Ratio Total, Ratio Percentage Chanage - Annual, Ratio Percentage Chanage - Cumulative

In [110]:
cols_fte = ['year','fte','fte pct change','fte pct change cumulative','type']

phd_pct_change.info()
phd_fte_pct_change_total_student = phd_pct_change[['year','student fte','student fte pct change','student pct change cumulative']]
phd_fte_pct_change_total_student = phd_fte_pct_change_total_student.reset_index(drop=True)
phd_fte_pct_change_total_student["type"] = "student"
phd_fte_pct_change_total_student.columns = cols_fte
phd_fte_pct_change_total_student = phd_fte_pct_change_total_student.reset_index(drop=True)
#phd_pct_change_total_student.head(2)

phd_fte_pct_change_total_staff = phd_pct_change[['year','total staff fte','staff fte pct change','staff pct change cumulative']]
phd_fte_pct_change_total_staff = phd_fte_pct_change_total_staff.reset_index(drop=True)
phd_fte_pct_change_total_staff["type"] = "all staff"
phd_fte_pct_change_total_staff.columns = cols_fte
phd_fte_pct_change_total_staff = phd_fte_pct_change_total_staff.reset_index(drop=True)
#phd_pct_change_total_staff.head(2)

phd_fte_pct_change_total_instruction = phd_pct_change[['year','total instruction staff fte','instruction fte pct change','instruction pct change cumulative']]
phd_fte_pct_change_total_instruction = phd_fte_pct_change_total_instruction.reset_index(drop=True)
phd_fte_pct_change_total_instruction["type"] = "instructional staff"
phd_fte_pct_change_total_instruction.columns = cols_fte
phd_fte_pct_change_total_instruction = phd_fte_pct_change_total_instruction.reset_index(drop=True)
#phd_pct_change_total_non_instruction.head(2)

phd_fte_pct_change_total_non_instruction = phd_pct_change[['year','total non instruction staff fte','non instruction fte pct change','non instruction pct change cumulative']]
phd_fte_pct_change_total_non_instruction = phd_fte_pct_change_total_non_instruction.reset_index(drop=True)
phd_fte_pct_change_total_non_instruction["type"] = "non instructional staff"
phd_fte_pct_change_total_non_instruction.columns = cols_fte
phd_fte_pct_change_total_non_instruction = phd_fte_pct_change_total_non_instruction.reset_index(drop=True)
#phd_pct_change_total_non_instruction.head(2)

phd_fte_pct_change_by_type = pd.concat([phd_fte_pct_change_total_student,phd_fte_pct_change_total_staff,phd_fte_pct_change_total_instruction,phd_fte_pct_change_total_non_instruction],ignore_index=False)
phd_fte_pct_change_by_type=phd_fte_pct_change_by_type.reset_index(drop=True)

phd_fte_pct_change_by_type.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 22 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   year                                                       20 non-null     object 
 1   student fte                                                20 non-null     int64  
 2   total staff fte                                            20 non-null     float64
 3   total instruction staff fte                                20 non-null     float64
 4   total non instruction staff fte                            20 non-null     float64
 5   student fte per staff fte                                  20 non-null     float64
 6   student fte per instruction staff fte                      20 non-null     float64
 7   student fte per non instruction staff fte                  20 non-null     float64
 8   student fte 

Unnamed: 0,year,fte,fte pct change,fte pct change cumulative,type
0,2002,3900471.0,0.0,-0.0,student
1,2003,3983501.0,2.13,2.13,student


In [111]:
cols_ratio = ['year','student fte per staff fte','ratio pct change','ratio pct change cumulative','type']

phd_ratio_pct_change_total_staff = phd_pct_change[['year','student fte per staff fte','student staff ratio pct change','student staff ratio pct change cumulative']]
phd_ratio_pct_change_total_staff = phd_ratio_pct_change_total_staff.reset_index(drop=True)
phd_ratio_pct_change_total_staff["type"] = "all staff"
phd_ratio_pct_change_total_staff.columns = cols_ratio
phd_ratio_pct_change_total_staff = phd_ratio_pct_change_total_staff.reset_index(drop=True)
#phd_ratio_pct_change_total_staff.head(2)

phd_ratio_pct_change_total_instruction = phd_pct_change[['year','student fte per instruction staff fte','student instruction staff ratio pct change','student instruction staff ratio pct change cumulative']]
phd_ratio_pct_change_total_instruction = phd_ratio_pct_change_total_instruction.reset_index(drop=True)
phd_ratio_pct_change_total_instruction["type"] = "instructional staff"
phd_ratio_pct_change_total_instruction.columns = cols_ratio
phd_ratio_pct_change_total_instruction = phd_ratio_pct_change_total_instruction.reset_index(drop=True)
#phd_ratio_pct_change_total_instruction.head(2)

phd_ratio_pct_change_total_non_instruction = phd_pct_change[['year','student fte per non instruction staff fte','student non instruction staff ratio pct change','student non instruction staff ratio pct change cumulative']]
phd_ratio_pct_change_total_non_instruction = phd_ratio_pct_change_total_non_instruction.reset_index(drop=True)
phd_ratio_pct_change_total_non_instruction["type"] = "non instructional staff"
phd_ratio_pct_change_total_non_instruction.columns = cols_ratio
phd_ratio_pct_change_total_non_instruction = phd_ratio_pct_change_total_non_instruction.reset_index(drop=True)
#phd_pct_change_total_non_instruction.head(2)

phd_ratio_pct_change_by_type = pd.concat([phd_ratio_pct_change_total_staff,phd_ratio_pct_change_total_instruction,phd_ratio_pct_change_total_non_instruction],ignore_index=False)
phd_ratio_pct_change_by_type=phd_ratio_pct_change_by_type.reset_index(drop=True)


All - FTE - Percentage Change
1. Annual

In [112]:
all_inst_pct_change = df.groupby(['year']).agg({'student fte':'sum','total staff fte':'sum','total instruction staff fte':'sum','total non instruction staff fte':'sum','student fte per staff fte':'mean','student fte per instruction staff fte':'mean','student fte per non instruction staff fte':'mean'})
all_inst_pct_change = all_inst_pct_change.reset_index()
all_inst_pct_change.columns = ['year','student fte','total staff fte','total instruction staff fte','total non instruction staff fte','student fte per staff fte','student fte per instruction staff fte','student fte per non instruction staff fte']
all_inst_pct_change['student fte per staff fte'] = all_inst_pct_change['student fte per staff fte'].round(2)
all_inst_pct_change['student fte per instruction staff fte'] = all_inst_pct_change['student fte per instruction staff fte'].round(2)
all_inst_pct_change['student fte per non instruction staff fte'] = all_inst_pct_change['student fte per non instruction staff fte'].round(2)

all_inst_pct_change.head(2)

Unnamed: 0,year,student fte,total staff fte,total instruction staff fte,total non instruction staff fte,student fte per staff fte,student fte per instruction staff fte,student fte per non instruction staff fte
0,2002,11119226,1944071.0,678155.0,1265965.0,6.76,16.35,12.07
1,2003,11395962,1953453.0,683106.0,1270444.0,6.75,16.38,12.23


In [113]:
all_inst_pct_change['student fte pct change'] = all_inst_pct_change['student fte'].pct_change()
all_inst_pct_change['student fte pct change'] = (all_inst_pct_change['student fte pct change']*100).round(2)
all_inst_pct_change = all_inst_pct_change.reset_index(drop=True)

all_inst_pct_change['staff fte pct change'] = all_inst_pct_change['total staff fte'].pct_change()
all_inst_pct_change['staff fte pct change'] = (all_inst_pct_change['staff fte pct change']*100).round(2)
all_inst_pct_change = all_inst_pct_change.reset_index(drop=True)

all_inst_pct_change['instruction fte pct change'] = all_inst_pct_change['total instruction staff fte'].pct_change()
all_inst_pct_change['instruction fte pct change'] = (all_inst_pct_change['instruction fte pct change']*100).round(2)
all_inst_pct_change = all_inst_pct_change.reset_index(drop=True)

all_inst_pct_change['non instruction fte pct change'] = all_inst_pct_change['total non instruction staff fte'].pct_change()
all_inst_pct_change['non instruction fte pct change'] = (all_inst_pct_change['non instruction fte pct change']*100).round(2)
all_inst_pct_change = all_inst_pct_change.reset_index(drop=True)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)

all_inst_pct_change.head()

Unnamed: 0,year,student fte,total staff fte,total instruction staff fte,total non instruction staff fte,student fte per staff fte,student fte per instruction staff fte,student fte per non instruction staff fte,student fte pct change,staff fte pct change,instruction fte pct change,non instruction fte pct change
0,2002,11119226,1944071.0,678155.0,1265965.0,6.76,16.35,12.07,0.0,0.0,0.0,0.0
1,2003,11395962,1953453.0,683106.0,1270444.0,6.75,16.38,12.23,2.49,0.48,0.73,0.35
2,2004,11626900,2040905.0,763673.0,1277304.0,6.71,16.11,12.29,2.03,4.48,11.79,0.54
3,2005,11735284,2015238.0,717422.0,1297863.0,6.66,16.09,12.17,0.93,-1.26,-6.06,1.61
4,2006,11926403,2061024.0,732787.0,1328307.0,6.57,15.89,12.01,1.63,2.27,2.14,2.35


All - Percentage Change - FTE
2. Cumulative

In [114]:
all_inst_base_student = 11196060
base_date = '2002-01-01'

def all_inst_pct_change_period_student(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte']-all_inst_base_student)/all_inst_base_student)*100)
    else:
        return 0.0
    
all_inst_pct_change['student pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_student(row), axis=1)
all_inst_pct_change['student pct change cumulative'] = all_inst_pct_change['student pct change cumulative'].round(2)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)
#print(all_inst_pct_change)

In [115]:
all_inst_base_staff = 1948888.0
base_date = '2002-01-01'

def all_inst_pct_change_period_staff(row):
    if row['year'] != '2002-01-01':
        return (((row['total staff fte']-all_inst_base_staff)/all_inst_base_staff)*100)
    else:
        return 0.0
    
all_inst_pct_change['staff pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_staff(row), axis=1)
all_inst_pct_change['staff pct change cumulative'] = all_inst_pct_change['staff pct change cumulative'].round(2)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)
#print(all_inst_pct_change)

In [116]:
all_inst_base_instruction = 680832.0
base_date = '2002-01-01'

def all_inst_pct_change_period_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total instruction staff fte']-all_inst_base_instruction)/all_inst_base_instruction)*100)
    else:
        return 0.0
    
all_inst_pct_change['instruction pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_instruction(row), axis=1)
all_inst_pct_change['instruction pct change cumulative'] = all_inst_pct_change['instruction pct change cumulative'].round(2)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)
#print(all_inst_pct_change)

In [117]:
all_inst_base_non_instruction = 1268106.0
base_date = '2002-01-01'

def all_inst_pct_change_period_non_instruction(row):
    if row['year'] != '2002-01-01':
        return (((row['total non instruction staff fte']-all_inst_base_non_instruction)/all_inst_base_non_instruction)*100)
    else:
        return 0.0
    
all_inst_pct_change['non instruction pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_non_instruction(row), axis=1)
all_inst_pct_change['non instruction pct change cumulative'] = all_inst_pct_change['non instruction pct change cumulative'].round(2)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)
all_inst_pct_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 16 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   year                                       20 non-null     object 
 1   student fte                                20 non-null     int64  
 2   total staff fte                            20 non-null     float64
 3   total instruction staff fte                20 non-null     float64
 4   total non instruction staff fte            20 non-null     float64
 5   student fte per staff fte                  20 non-null     float64
 6   student fte per instruction staff fte      20 non-null     float64
 7   student fte per non instruction staff fte  20 non-null     float64
 8   student fte pct change                     20 non-null     float64
 9   staff fte pct change                       20 non-null     float64
 10  instruction fte pct change  

All Institutions - Percentage Change - Ratio 
1. Annual

In [118]:
all_inst_pct_change['student staff ratio pct change'] = all_inst_pct_change['student fte per staff fte'].pct_change()
all_inst_pct_change['student staff ratio pct change'] = (all_inst_pct_change['student staff ratio pct change']*100).round(2)

all_inst_pct_change['student instruction staff ratio pct change'] = all_inst_pct_change['student fte per instruction staff fte'].pct_change()
all_inst_pct_change['student instruction staff ratio pct change'] = (all_inst_pct_change['student instruction staff ratio pct change']*100).round(2)

all_inst_pct_change['student non instruction staff ratio pct change'] = all_inst_pct_change['student fte per non instruction staff fte'].pct_change()
all_inst_pct_change['student non instruction staff ratio pct change'] = (all_inst_pct_change['student non instruction staff ratio pct change']*100).round(2)

all_inst_pct_change = all_inst_pct_change.fillna(0.00)

All Institutions - Percentage Change - Ratio 
1. Cumulative

In [119]:
all_inst_base_staff_ratio = 6.81
base_date = '2002-01-01'

def all_inst_pct_change_period_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per staff fte']-all_inst_base_staff_ratio)/all_inst_base_staff_ratio)*100)
    else:
        return 0.0
    
all_inst_pct_change['student staff ratio pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_staff_ratio(row), axis=1)
all_inst_pct_change['student staff ratio pct change cumulative'] = all_inst_pct_change['student staff ratio pct change cumulative'].round(2)

In [120]:
all_inst_base_instruction_staff_ratio = 16.41
base_date = '2002-01-01'

def all_inst_pct_change_period_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per instruction staff fte']-all_inst_base_instruction_staff_ratio)/all_inst_base_instruction_staff_ratio)*100)
    else:
        return 0.0
    
all_inst_pct_change['student instruction staff ratio pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_instruction_staff_ratio(row), axis=1)
all_inst_pct_change['student instruction staff ratio pct change cumulative'] = all_inst_pct_change['student instruction staff ratio pct change cumulative'].round(2)

In [121]:
all_inst_base_non_instruction_staff_ratio = 12.20
base_date = '2002-01-01'

def all_inst_pct_change_period_non_instruction_staff_ratio(row):
    if row['year'] != '2002-01-01':
        return (((row['student fte per non instruction staff fte']-all_inst_base_non_instruction_staff_ratio)/all_inst_base_non_instruction_staff_ratio)*100)
    else:
        return 0.0
    
all_inst_pct_change['student non instruction staff ratio pct change cumulative'] = all_inst_pct_change.apply(lambda row: all_inst_pct_change_period_non_instruction_staff_ratio(row), axis=1)
all_inst_pct_change['student non instruction staff ratio pct change cumulative'] = all_inst_pct_change['student non instruction staff ratio pct change cumulative'].round(2)

all_inst_pct_change.info(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 22 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   year                                                       20 non-null     object 
 1   student fte                                                20 non-null     int64  
 2   total staff fte                                            20 non-null     float64
 3   total instruction staff fte                                20 non-null     float64
 4   total non instruction staff fte                            20 non-null     float64
 5   student fte per staff fte                                  20 non-null     float64
 6   student fte per instruction staff fte                      20 non-null     float64
 7   student fte per non instruction staff fte                  20 non-null     float64
 8   student fte 

Step 3:
Visualizations 

In [232]:
pio.templates.default = "plotly_white"

fig_fte = go.Figure()
fig_fte.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['student fte'],name="total student",mode="lines",line=dict(width=3,color='#3a3940'),visible=True))
fig_fte.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['total staff fte'],name="total staff",mode="lines",line=dict(width=3,color='#142A9C'),visible=True))
fig_fte.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['total instruction staff fte'],name="instructional staff",mode="lines",line=dict(width=2,color='#A1A1CE'),visible=False))
fig_fte.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['total non instruction staff fte'],name="non instructional staff",mode="lines",line=dict(width=2,color='#7498D2'),visible=False))


fig_fte.update_layout(
    plot_bgcolor="#FAFDF1",
    paper_bgcolor="#FAFDF1",
    hovermode="x",
    hoverdistance=100, # Distance to show hover label of data point
    spikedistance=1000, # Distance to show spike
    xaxis=dict(
        title='year',
        linecolor="#BCCCDC",
        showspikes=True,
        spikethickness=2,
        spikedash="dot",
        spikecolor="#999999",
        spikemode="across",
        showgrid=False,
        fixedrange = True
    ),
    yaxis = dict(
        title="total full-time equivalent (FTE) headcount",
        linecolor="#BCCCDC",
        showgrid=False,
        fixedrange = True,
        range = [0,15000]
    )
)

fig_fte.update_layout(
    autosize=False,
    width=608,
    height=508,
    margin=dict(l=20, r=20, t=20, b=20))

fig_fte.update_layout(legend = dict(
    orientation = "h",
    x=0,
    y=1,
    bgcolor="#FAFDF1")
)

fig_fte.update_layout(
    title=dict(text="<b>Change in FTE</b>", font=dict(size=16), automargin=True, yref='paper')
)

fig_fte.update_layout(
    title_font_family="Georgia",
    title_font_color="#3a3940",
    title_font_size = 16
)


fig_fte = fig_fte.update_xaxes(tickangle=45)
fig_fte.update_layout(hoverlabel_namelength=-1)

fig_fte.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            active=0,
            x=1,
            y=1.15,
            buttons=list([
                dict(label="Total",
                     method="update",
                     args=[{"visible": [True, True, False, False]}]),
                dict(label="Staff Type",
                     method="update",
                     args=[{"visible": [True, True, True, True]}])
            ]),
        )
    ])

fig_fte.show()
fig_fte.write_html("/Users/ellenruthconnell/Documents/INFO_696/4_visualizations/figure1.html")

In [258]:
pio.templates.default = "plotly_white"

fig_ratio = make_subplots(specs=[[{"secondary_y": True}]])

fig_ratio.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per staff fte'],orientation='v',name="student : staff ratio",marker=dict(color='#142A9C'),width=.3,visible=True),secondary_y=False)
fig_ratio.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per non instruction staff fte'],orientation='v',name="student : non instructional staff ratio",marker=dict(color='#7498D2'),width=.3,visible=False),secondary_y=False)
fig_ratio.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per instruction staff fte'],orientation='v',name="student : instructional staff ratio",marker=dict(color='#A1A1CE'),width=.3,visible=False),secondary_y=False)
fig_ratio.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['student fte'],name="total student",mode="lines",line=dict(width=2,color='#3a3940'),visible=True),secondary_y=True)


fig_ratio.update_yaxes(title_text="FTE headcount", range=[0,16000], secondary_y=True)

fig_ratio.update_layout(
    plot_bgcolor="#FAFDF1",
    paper_bgcolor="#FAFDF1",
    hovermode="x",
    hoverdistance=100, # Distance to show hover label of data point
    spikedistance=1000, # Distance to show spike
    xaxis=dict(
        title='year',
        linecolor="#BCCCDC",
        showspikes=True,
        spikethickness=2,
        spikedash="dot",
        spikecolor="#999999",
        spikemode="across",
        showgrid=False,
        fixedrange = True
    ),
    yaxis = dict(
        title="student FTE per staff FTE",
        linecolor="#BCCCDC",
        showgrid=False,
        fixedrange = True,
        range=[0,15]
    )
)

fig_ratio.update_layout(
    autosize=False,
    width=608,
    height=508,
    margin=dict(l=20, r=20, t=20, b=20))

fig_ratio.update_layout(legend = dict(
    orientation = "h",
    x=0,
    y=1,
    bgcolor="#FAFDF1")
)

fig_ratio.update_layout(
    title=dict(text="<b>Student:staff ratio</b>", font=dict(size=16), automargin=True, yref='paper')
)

fig_ratio.update_layout(
    title_font_family="Georgia",
    title_font_color="#3a3940",
    title_font_size = 16
)


fig_ratio = fig_ratio.update_xaxes(tickangle=45)
fig_ratio.update_layout(hoverlabel_namelength=-1)

fig_ratio.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            active=0,
            x=1,
            y=1.15,
            buttons=list([
                dict(label="Total",
                     method="update",
                     args=[{"visible": [True, False, False, True]}]),
                dict(label="Staff Type",
                     method="update",
                     args=[{"visible": [True, True, True, True]}])
            ]),
        )
    ])

fig_ratio.show()
fig_ratio.write_html("/Users/ellenruthconnell/Documents/INFO_696/4_visualizations/figure2.html")

In [280]:
pio.templates.default = "plotly_white"

fig_fte_percent_change = go.Figure()
fig_fte_percent_change.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['student pct change cumulative'],name="total student",mode="lines",line=dict(width=2,color='#3a3940'),visible=True))
fig_fte_percent_change.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['staff pct change cumulative'],name="total staff",mode="lines",line=dict(width=2,color='#142A9C'),visible=True))
fig_fte_percent_change.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['instruction pct change cumulative'],name="instructional staff",mode="lines",line=dict(width=2,color='#A1A1CE'),visible=True))
fig_fte_percent_change.add_trace(go.Scatter(x=yale_pct_change["year"],y=yale_pct_change['non instruction pct change cumulative'],name="non instructional staff",mode="lines",line=dict(width=2,color='#7498D2'),visible=True))

fig_fte_percent_change.update_yaxes(title_text="cumulative percentage change", range=[15,85])

fig_fte_percent_change.update_layout(
    plot_bgcolor="#FAFDF1",
    paper_bgcolor="#FAFDF1",
    hovermode="x",
    hoverdistance=100, # Distance to show hover label of data point
    spikedistance=1000, # Distance to show spike
    xaxis=dict(
        title='year',
        linecolor="#BCCCDC",
        showspikes=True,
        spikethickness=2,
        spikedash="dot",
        spikecolor="#999999",
        spikemode="across",
        showgrid=False,
        fixedrange = True
    ),
    yaxis = dict(
        title="cumulative percentage change",
        linecolor="#BCCCDC",
        showgrid=False,
        fixedrange = True,
        range=[-15,85]
    )
)

fig_fte_percent_change.update_layout(
    autosize=False,
    width=608,
    height=508,
    margin=dict(l=20, r=20, t=65, b=20))

fig_fte_percent_change.update_layout(legend = dict(
    orientation = "h",
    x=0,
    y=1,
    bgcolor="#FAFDF1")
)

fig_fte_percent_change.update_layout(
    title=dict(text="<b>Cumulative change</b>", font=dict(size=16), automargin=True, yref='paper')
)

fig_fte_percent_change.update_layout(
    title_font_family="Georgia",
    title_font_color="#3a3940",
    title_font_size = 16
)

fig_fte_percent_change.update_layout(yaxis_ticksuffix = "%")

fig_fte_percent_change = fig_fte_percent_change.update_xaxes(tickangle=45)
fig_fte_percent_change.update_layout(hoverlabel_namelength=-1)

fig_fte_percent_change.show()
fig_fte_percent_change.write_html("/Users/ellenruthconnell/Documents/INFO_696/4_visualizations/figure3.html")

Visualization:
Comparing Yale to Average Peer and All Institutions


In [281]:
pio.templates.default = "plotly_white"

fig_ratio_all = go.Figure()

fig_ratio_all.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per staff fte'],orientation='v',name="Yale",marker=dict(color='#142A9C'),width=.3,visible=True))
fig_ratio_all.add_trace(go.Bar(x=phd_pct_change["year"],y=phd_pct_change['student fte per staff fte'],orientation='v',name="research institution average",marker=dict(color='#70706F'),width=.3,visible=True))
fig_ratio_all.add_trace(go.Bar(x=all_inst_pct_change["year"],y=all_inst_pct_change['student fte per staff fte'],orientation='v',name="all institution average",marker=dict(color='#A1A2A3'),width=.3,visible=True))

fig_ratio_all.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per instruction staff fte'],orientation='v',name="Yale",marker=dict(color='#A1A1CE'),width=.3,visible=False))
fig_ratio_all.add_trace(go.Bar(x=phd_pct_change["year"],y=phd_pct_change['student fte per instruction staff fte'],orientation='v',name="research institution average",marker=dict(color='#7D7D7A'),width=.3,visible=False))
fig_ratio_all.add_trace(go.Bar(x=all_inst_pct_change["year"],y=all_inst_pct_change['student fte per instruction staff fte'],orientation='v',name="all institution average",marker=dict(color='#B3B6B5'),width=.3,visible=False))

fig_ratio_all.add_trace(go.Bar(x=yale_pct_change["year"],y=yale_pct_change['student fte per non instruction staff fte'],orientation='v',name="Yale",marker=dict(color='#7498D2'),width=.3,visible=False))
fig_ratio_all.add_trace(go.Bar(x=phd_pct_change["year"],y=phd_pct_change['student fte per non instruction staff fte'],orientation='v',name="research institution average",marker=dict(color='#8E8D8D'),width=.3,visible=False))
fig_ratio_all.add_trace(go.Bar(x=all_inst_pct_change["year"],y=all_inst_pct_change['student fte per non instruction staff fte'],orientation='v',name="all institution average",marker=dict(color='#BEC0C2'),width=.3,visible=False))


fig_ratio_all.update_layout(
    plot_bgcolor="#FAFDF1",
    paper_bgcolor="#FAFDF1",
    hovermode="x",
    hoverdistance=100, # Distance to show hover label of data point
    spikedistance=1000, # Distance to show spike
    xaxis=dict(
        title='year',
        linecolor="#BCCCDC",
        showspikes=True,
        spikethickness=2,
        spikedash="dot",
        spikecolor="#999999",
        spikemode="across",
        showgrid=False,
        fixedrange = True
    ),
    yaxis = dict(
        title="student FTE per staff FTE",
        linecolor="#BCCCDC",
        showgrid=False,
        fixedrange = True,
        range=[0,18]
    )
)

fig_ratio_all.update_layout(
    autosize=False,
    width=608,
    height=508,
    margin=dict(l=20, r=20, t=20, b=20))

fig_ratio_all.update_layout(legend = dict(
    orientation = "h",
    x=0,
    y=1,
    bgcolor="#FAFDF1")
)

fig_ratio_all.update_layout(
    title=dict(text="<b>Student:staff ratio</b>", font=dict(size=16), automargin=True, yref='paper')
)

fig_ratio_all.update_layout(
    title_font_family="Georgia",
    title_font_color="#3a3940",
    title_font_size = 16
)


fig_ratio_all = fig_ratio_all.update_xaxes(tickangle=45)
fig_ratio_all.update_layout(hoverlabel_namelength=-1)

fig_ratio_all.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            active=0,
            x=1,
            y=1.15,
            buttons=list([
                dict(label="All Staff",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False, False, False, False]}]),
                dict(label="Instructional Staff",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True, False, False, False]}]),
                dict(label="Non-Instructional Staff",
                     method="update",
                     args=[{"visible": [False, False, False, False, False, False, True, True, True]}])
            ]),
        )
    ])

fig_ratio_all.show()
fig_ratio_all.write_html("/Users/ellenruthconnell/Documents/INFO_696/4_visualizations/figure4.html")