# LEA School District Finance Survey – Data Visualizations


#### Note: This project is still in progress

In [57]:
import pandas as pd
import numpy as np
import os
import json
import psycopg2
from psycopg2 import OperationalError
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

### Connect to Database & Create DataFrames | Exploratory Analysis

In [58]:
def execute_sql(query: str, conn) -> pd.DataFrame:
    # Create a new cursor
    cur = conn.cursor()
    try:
        # Create a new cursor using the connection passed as an argument
        cur = conn.cursor()
        
        # Execute the SQL query
        cur.execute(query)

        # Fetch the results
        rows = cur.fetchall()

        # Get the column names
        colnames = [desc[0] for desc in cur.description]

        # Convert query to dataframe
        df = pd.DataFrame(rows, columns=colnames)

    except OperationalError as e:
        print(f"An error occurred: {e}")
        df = pd.DataFrame()
    finally:
        # Close the cursor after the operation is complete
        if cur is not None:
            cur.close()
    
    return df

In [59]:
# Import database credentials from json file
db_credentials_file_name = 'LEA_Finance_Survey_DB.json'
db_credentials_path = os.path.join("..", db_credentials_file_name)
with open(db_credentials_path) as infile:
    credentials = json.load(infile)
    
# Assign Credentials to Variables
database_name = credentials['database']
username = credentials['user']
password = credentials['password']
host = credentials['host']
port = credentials['port']

# Establish a connection to the database
conn = psycopg2.connect(
    dbname=database_name,
    user=username,
    password=password,
    host=host,
    port=port
)

In [60]:
# Query expenditure data
query = """
SELECT 
    state, 
    expenditure_title, 
    year, 
    SUM(amount) AS amount, 
    AVG(amount_z_score) AS amount_z_score_avg 
FROM expenses.expenditure_zscores_by_state_year
WHERE expenditure_title ILIKE '%tech%' 
OR expenditure_title ILIKE '%vocation%'
GROUP BY state, year, expenditure_title
ORDER BY state, year, expenditure_title;
"""

In [61]:
# Convert query to dataframe
tech_vocational_expenditures = execute_sql(query, conn)
tech_vocational_expenditures.head()

Unnamed: 0,state,expenditure_title,year,amount,amount_z_score_avg
0,Alabama,teacher_salaries_vocational_education,2010.0,110309000.0,-1.678030303030303e-19
1,Alabama,teacher_salaries_vocational_education,2011.0,107614000.0,-6.141666666666667e-19
2,Alabama,teacher_salaries_vocational_education,2012.0,105162000.0,-2.256060606060606e-19
3,Alabama,teacher_salaries_vocational_education,2013.0,108247000.0,1.7134328358208956e-19
4,Alabama,teacher_salaries_vocational_education,2014.0,110787000.0,6.566666666666666e-19


In [62]:
tech_vocational_expenditures.dtypes

state                  object
expenditure_title      object
year                  float64
amount                 object
amount_z_score_avg     object
dtype: object

In [63]:
tech_vocational_expenditures.describe()

Unnamed: 0,year
count,876.0
mean,2016.410959
std,3.013504
min,2010.0
25%,2014.0
50%,2017.0
75%,2019.0
max,2020.0


In [64]:
tech_vocational_expenditures['year'] = tech_vocational_expenditures['year'].astype(int)
tech_vocational_expenditures['amount'] = tech_vocational_expenditures['amount'].astype('float64')
tech_vocational_expenditures['amount_z_score_avg'] = tech_vocational_expenditures['amount_z_score_avg'].astype('float64')

tech_vocational_expenditures.dtypes

state                  object
expenditure_title      object
year                    int64
amount                float64
amount_z_score_avg    float64
dtype: object

In [65]:
tech_vocational_expenditures.head()

Unnamed: 0,state,expenditure_title,year,amount,amount_z_score_avg
0,Alabama,teacher_salaries_vocational_education,2010,110309000.0,-1.67803e-19
1,Alabama,teacher_salaries_vocational_education,2011,107614000.0,-6.141666999999999e-19
2,Alabama,teacher_salaries_vocational_education,2012,105162000.0,-2.2560609999999997e-19
3,Alabama,teacher_salaries_vocational_education,2013,108247000.0,1.713433e-19
4,Alabama,teacher_salaries_vocational_education,2014,110787000.0,6.566667e-19


In [66]:
query = """
SELECT 
	e.state, 
	DATE_PART('year', stats.year) AS year, 
	SUM(stats.fall_membership) AS student_count
FROM entity.annual_stats AS stats
INNER JOIN entity.entity as e
	ON stats.census_id = e.census_id
GROUP BY e.state, stats.year
ORDER BY e.state, stats.year;
"""

In [67]:
student_counts_by_state_year = execute_sql(query, conn)
student_counts_by_state_year.head()

Unnamed: 0,state,year,student_count
0,Alabama,2010.0,748889.0
1,Alabama,2011.0,738322.0
2,Alabama,2012.0,744621.0
3,Alabama,2013.0,744548.0
4,Alabama,2014.0,740081.0


In [68]:
student_counts_by_state_year['year'] = student_counts_by_state_year['year'].astype(int)
student_counts_by_state_year['student_count'] = student_counts_by_state_year['student_count'].astype(int)
student_counts_by_state_year.dtypes

state            object
year              int64
student_count     int64
dtype: object

In [69]:
merged = pd.merge(student_counts_by_state_year, tech_vocational_expenditures, how= 'inner', on= ['state', 'year'], validate="many_to_many")
merged.head()

Unnamed: 0,state,year,student_count,expenditure_title,amount,amount_z_score_avg
0,Alabama,2010,748889,teacher_salaries_vocational_education,110309000.0,-1.67803e-19
1,Alabama,2011,738322,teacher_salaries_vocational_education,107614000.0,-6.141666999999999e-19
2,Alabama,2012,744621,teacher_salaries_vocational_education,105162000.0,-2.2560609999999997e-19
3,Alabama,2013,744548,teacher_salaries_vocational_education,108247000.0,1.713433e-19
4,Alabama,2014,740081,teacher_salaries_vocational_education,110787000.0,6.566667e-19


In [70]:
merged['tech_vocational_cost_per_student'] = merged['amount'] / merged['student_count']
merged.head()

Unnamed: 0,state,year,student_count,expenditure_title,amount,amount_z_score_avg,tech_vocational_cost_per_student
0,Alabama,2010,748889,teacher_salaries_vocational_education,110309000.0,-1.67803e-19,147.296862
1,Alabama,2011,738322,teacher_salaries_vocational_education,107614000.0,-6.141666999999999e-19,145.754833
2,Alabama,2012,744621,teacher_salaries_vocational_education,105162000.0,-2.2560609999999997e-19,141.228894
3,Alabama,2013,744548,teacher_salaries_vocational_education,108247000.0,1.713433e-19,145.386194
4,Alabama,2014,740081,teacher_salaries_vocational_education,110787000.0,6.566667e-19,149.695777


In [71]:
grouped_expenditures = merged.groupby('year')['amount'].sum().reset_index()
grouped_expenditures.head()

Unnamed: 0,year,amount
0,2010,4269554000.0
1,2011,4359258000.0
2,2012,4899872000.0
3,2013,4958659000.0
4,2014,5264175000.0


### Graph Functions
The subsequent cells contain various functions for creating different types of charts. These functions are designed to ensure stylistic consistency across all charts and to minimize redundant code in my analyses. They allow for easy customization and quick generation of complex visualizations, streamlining the data presentation process.

In [72]:
def make_bar_chart_grid(df : pd.DataFrame, 
                        x : str, y : str, 
                        color : str, facet_col : str, 
                        facet_col_wrap : int, title : str) -> go.Figure:
    """
    Creates a grid of bar charts using Plotly Express.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data to plot.
    x (str): The name of the column to use for the x-axis.
    y (str): The name of the column to use for the y-axis.
    color (str): The name of the column to use for color coding.
    facet_col (str): The name of the column to create separate plots for each unique value.
    facet_col_wrap (int): The number of charts per row. Default is 4.
    title (str): The title of the plot. Default is an empty string.

    Returns:
    go.Figure: A Plotly graph object representing the line plot grid.
    """
    bar_plot_grid = px.bar(df, 
                 x= x, 
                 y= y,
                 facet_col= facet_col,
                 facet_col_wrap= facet_col_wrap,
                 color= color
                 )

    # Update layout with given title and additional layout arguments
    bar_plot_grid.update_layout(title_text= title,
                                title={
                                    'y':0.98,  # The position of the title can be adjusted with the y parameter
                                    'x':0.5,
                                    'xanchor': 'center',
                                    'yanchor': 'top'
                                    },
                                height=800,
                                width=1050, 
                                legend=dict(
                                    orientation="h",
                                    yanchor="bottom",
                                    y=-0.5,  # Adjusted position
                                    xanchor="center",
                                    x=0.5
                                    ),
                                margin=dict(l=40, r=40, t=80, b=200),  # Increase the top margin for padding
                                hoverlabel=dict(
                                    bgcolor="white",
                                    font_size=16,
                                    font_family="Calibri"
                                    )
                                )

    bar_plot_grid.update_annotations(font_size=10)  # Reduce font size for subplot titles
    
    return bar_plot_grid

In [73]:
def make_line_plot_grid(df : pd.DataFrame, 
                        x : str, y : str, 
                        color : str, facet_col : str, 
                        facet_col_wrap : int, title : str) -> go.Figure:
    """
    Creates a grid of line charts using Plotly Express.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data to plot.
    x (str): The name of the column to use for the x-axis.
    y (str): The name of the column to use for the y-axis.
    color (str): The name of the column to use for color coding.
    facet_col (str): The name of the column to create separate plots for each unique value.
    facet_col_wrap (int): The number of charts per row. Default is 4.
    title (str): The title of the plot. Default is an empty string.

    Returns:
    go.Figure: A Plotly graph object representing the line plot grid.
    """

    # Create the grid of line charts
    line_plot_grid = px.line(
        df,
        x= x,
        y= y,  
        color= color,
        facet_col= facet_col,  # Creates a separate plot for each region
        facet_col_wrap= facet_col_wrap,    # Adjust this to control how many charts per row
        title= title
    )

    # Update layout with given title and additional layout arguments
    line_plot_grid.update_layout(title_text=title,
                               title={
                                   'y':0.98,  # The position of the title can be adjusted with the y parameter
                                   'x':0.5,
                                   'xanchor': 'center',
                                   'yanchor': 'top'
                                   },
                                height=800,
                                width=1050, 
                                legend=dict(
                                    orientation="h",
                                    yanchor="bottom",
                                    y=-0.5,  # Adjusted position
                                    xanchor="center",
                                    x=0.5
                                    ),
                                margin=dict(l=40, r=40, t=80, b=200),  # Increase the top margin for padding
                                hoverlabel=dict(
                                    bgcolor="white",
                                    font_size=16,
                                    font_family="Calibri"
                                    )
                                )

    line_plot_grid.update_annotations(font_size=10)  # Reduce font size for subplot titles
    line_plot_grid.update_xaxes(tickangle=45, tickfont=dict(size=10)) # Update Tick Angles and Axis Font Size

    return line_plot_grid

In [74]:
def create_combined_figure(fig1: go.Figure, fig2: go.Figure, 
                           title: str, subplot_titles: tuple) -> go.Figure:
    """
    Creates a combined figure with two subplots.

    Parameters:
    fig1 (go.Figure): The first figure to be added to the subplot.
    fig2 (go.Figure): The second figure to be added to the subplot.
    title (str): The main title of the combined figure.
    subplot_titles (tuple): Titles for the subplots (two elements expected).

    Returns:
    go.Figure: A combined figure with two subplots.
    """

    # Create a subplot figure
    combined_fig = make_subplots(rows=1, cols=2, subplot_titles=subplot_titles,
                                 horizontal_spacing=0.1)

    # Add traces from fig1 to the first subplot
    for trace in fig1['data']:
        combined_fig.append_trace(trace, row=1, col=1)

    # Add traces from fig2 to the second subplot
    for trace in fig2['data']:
        combined_fig.append_trace(trace, row=1, col=2)

    # Update layout with given title and additional layout arguments
    combined_fig.update_layout(title_text=title,
                               title={
                                   'y':0.98,  # The position of the title can be adjusted with the y parameter
                                   'x':0.5,
                                   'xanchor': 'center',
                                   'yanchor': 'top'
                                   },
                                height=600,
                                width=1050, 
                                legend=dict(
                                    orientation="h",
                                    yanchor="bottom",
                                    y=-0.3,  # Adjusted position
                                    xanchor="center",
                                    x=0.5
                                    ),
                                margin=dict(l=40, r=40, t=80, b=200),  # Increase the top margin for padding
                                hoverlabel=dict(
                                    bgcolor="white",
                                    font_size=16,
                                    font_family="Calibri"
                                    )
                                )

    return combined_fig

### Data Visualizations & Insights

In [75]:
tech_voc_expenditures_line_chart1 = px.line(grouped_expenditures,
                                           x= 'year',
                                           y= 'amount',
                                           title= 'Tech and Vocational Education Expenditures Over Time')

In [76]:
amount_2014 = grouped_expenditures.loc[grouped_expenditures['year'] == 2014, 'amount'].iloc[0]
amount_2016 = grouped_expenditures.loc[grouped_expenditures['year'] == 2016, 'amount'].iloc[0]

# Calculate the total amount difference
total_difference = amount_2016 - amount_2014

# Calculate the percentage difference
percentage_difference = (total_difference / amount_2014) * 100

# Print out the results
print(f'2014 Amount: {amount_2014}')
print(f'2016 Amount: {amount_2016}')
print(f"Total amount difference between 2014 and 2016: {total_difference}")
print(f"Percentage difference between 2014 and 2016: {percentage_difference:.2f}%")

2014 Amount: 5264175000.0
2016 Amount: 9614528000.0
Total amount difference between 2014 and 2016: 4350353000.0
Percentage difference between 2014 and 2016: 82.64%


In [77]:
grouped_expenditures_by_title = merged.groupby(['year', 'expenditure_title'])['amount'].sum().reset_index()
grouped_expenditures_by_title.head()

Unnamed: 0,year,expenditure_title,amount
0,2010,teacher_salaries_vocational_education,4269554000.0
1,2011,teacher_salaries_vocational_education,4359258000.0
2,2012,teacher_salaries_vocational_education,4899872000.0
3,2013,teacher_salaries_vocational_education,4958659000.0
4,2014,teacher_salaries_vocational_education,5264175000.0


In [78]:
tech_voc_expenditures_line_chart2 = px.line(grouped_expenditures_by_title,
                                           x= 'year',
                                           y= 'amount', 
                                           color='expenditure_title', 
                                           title= 'Expenditures by Title Over Time')

In [79]:
tech_related_supplies_services_total = grouped_expenditures_by_title[
    (grouped_expenditures_by_title['expenditure_title'] == 'tech_related_supplies_services') & 
    (grouped_expenditures_by_title['year'].isin([2014, 2015, 2016]))
]['amount'].sum()

print(f'Tech Related Supplies & Services Total: {tech_related_supplies_services_total}')

Tech Related Supplies & Services Total: 3948099000.0


In [80]:
tech_related_equipment_total = grouped_expenditures_by_title[
    (grouped_expenditures_by_title['expenditure_title'] == 'tech_related_equipment') & 
    (grouped_expenditures_by_title['year'].isin([2014, 2015, 2016]))
]['amount'].sum()

print(f'Tech Related Equipment Total: {tech_related_equipment_total}')

Tech Related Equipment Total: 1263890000.0


In [81]:
teacher_salaries_vocational_education_total = grouped_expenditures_by_title[
    (grouped_expenditures_by_title['expenditure_title'] == 'teacher_salaries_vocational_education') & 
    (grouped_expenditures_by_title['year'].isin([2014, 2015, 2016]))
]['amount'].sum()

print(f'Teacher Salaries Vocational Education: {teacher_salaries_vocational_education_total}')

Teacher Salaries Vocational Education: 16308173000.0


In [82]:
grand_total = tech_related_supplies_services_total + tech_related_equipment_total + teacher_salaries_vocational_education_total

grand_total

21520162000.0

In [83]:
y2014 = grouped_expenditures[grouped_expenditures['year'] == 2014]['amount'].sum()
y2015 = grouped_expenditures[grouped_expenditures['year'] == 2015]['amount'].sum()
y2016 = grouped_expenditures[grouped_expenditures['year'] == 2016]['amount'].sum()
ytotal = y2014 + y2015 + y2016

print(f'2014 Total: ${y2014}')
print(f'2015 Total: ${y2015}')
print(f'2016 Total: ${y2016}')
print(f'Total: ${ytotal}')

2014 Total: $5264175000.0
2015 Total: $6641459000.0
2016 Total: $9614528000.0
Total: $21520162000.0


In [84]:
difference_between_2014_2016 = y2016 - y2014

percent_difference_between_2014_2016 = difference_between_2014_2016 / y2014 * 100

teacher_salaries_vocational_education_percent = teacher_salaries_vocational_education_total / grand_total * 100

tech_related_supplies_services_percent = tech_related_supplies_services_total / grand_total * 100

tech_related_equipment_percent = tech_related_equipment_total / grand_total * 100

print(f'Difference between 2014 and 2016: ${difference_between_2014_2016}')
print(f'Percent difference between 2014 and 2016: {percent_difference_between_2014_2016:.2f}%')
print(f'Teacher Salaries Vocational Education % of Total Revenue between 2014 and 2016: {teacher_salaries_vocational_education_percent:.2f}%')
print(f'Tech Related Supplies & Services % of Total Revenue between 2014 and 2016: {tech_related_supplies_services_percent:.2f}%')
print(f'Tech Related Equipment % of Total Revenue between 2014 and 2016: {tech_related_equipment_percent:.2f}%')

Difference between 2014 and 2016: $4350353000.0
Percent difference between 2014 and 2016: 82.64%
Teacher Salaries Vocational Education % of Total Revenue between 2014 and 2016: 75.78%
Tech Related Supplies & Services % of Total Revenue between 2014 and 2016: 18.35%
Tech Related Equipment % of Total Revenue between 2014 and 2016: 5.87%


In [85]:
tech_vocational_line_charts_combined = create_combined_figure(tech_voc_expenditures_line_chart1, 
                                            tech_voc_expenditures_line_chart2, 
                                            'Tech and Vocational Education Expenditures Comparison',
                                            ('Total Expenditures Over Time', 'Expenditures by Title Over Time')
                                            )

tech_vocational_line_charts_combined.show()

### Notes:

Original Analysis:  

**Total Expenditures Over Time:**  
- There is steady positive growth in Tech and Vocational expenditures from 2010 to 2014.
- In 2014, there is a sharp increase in expenditures ending in 2016.
- In 2016, expenditures appear to steady.
- Positive growth is visible throughout. 

**Expenditures by Title Over Time:**  
- After breaking the data out by Expenditure Title, we see that in 2015, two additional expenditure categories were added:
    - Tech Related Supplies & Services
    - Tech Related Equipment
- There is fairly consistent positive growth in Tech Related Equipment between 2015 and 2019.
- After 2019, there is a slight decline in this expenditure category. 
    - It should be noted that COVID-19 started spreading in late 2019, which lead to worldwide pandemic in 2020. 
- Tech Related Supplies & Services appears to have the largest impact to the growth exhibited between 2015 and 2020.
- Teacher Salaries Vocational Education remains constant.  

### Additional Analysis:  
- Difference between 2014 & 2016: $ 4,350,353,000  
- Percent difference between 2014 & 2016: 82.64%  
- Teacher Salaries Vocational Education % of Total Tech and Vocational Expenditures from 2014 to 2016: 75.78%  
    - Total: $16,308,173,000  
- Tech Related Supplies & Services percent of Total Tech and Vocational Expenditures from 2014 to 2016: 18.35%  
    - Total: $3,948,099,000  
- Tech Related Equipment percent of Total Tech and Vocational Expenditures from 2014 to 2016: 5.87%  
    - Total: $1,263,890,000

#### Summary:  
Between 2014 and 2016, there was an 82.64% increase in tech and vocational education related expenditures. Upon further investigation, this increase was driven by the addition of two expenditure categories, `Tech Related Supplies Services` and `Tech Related Equipment`. The main driving factor in the expenditure spike was due to `Tech Related Supplies Services`, which accounted for 18.35% of the total expenditures between 2014 and 2016. Although we see consistent growth in `Teacher Salaries Vocational Edication`, this still accounted for 75.78% of expenditures between 2014 and 2016. After 2016, the sharp spike in expenditures slows, but still maintains a positive trend. `Tech Related Supplies Services` continues to have a greater influence to the overall trend despite the spike in expenditures slowing after 2016.

**Food for Thought:**  
- There is a steady increase in `Teacher Salaries Vocational Education` over the last decade. Is this change driven by higher teacher salaries, more vocational class offerings resulting in more teachers being hired, or is it a combination of both?
- What are the rankings of expenditures by state?
- What are the rankings of individual school districts within each state that are driving these trends?
- Do student counts per school district/state have any impact on these trends?

In [86]:
grouped = merged.groupby(['year', 'expenditure_title'])['tech_vocational_cost_per_student'].sum().reset_index()
grouped.head()

Unnamed: 0,year,expenditure_title,tech_vocational_cost_per_student
0,2010,teacher_salaries_vocational_education,4967.648605
1,2011,teacher_salaries_vocational_education,5143.232523
2,2012,teacher_salaries_vocational_education,5135.916404
3,2013,teacher_salaries_vocational_education,5184.779075
4,2014,teacher_salaries_vocational_education,5512.360265


In [87]:
grouped_cost_per_student = grouped.groupby('year')['tech_vocational_cost_per_student'].sum().reset_index()
cost_per_student_2014 = grouped_cost_per_student[grouped_cost_per_student['year'] == 2014]['tech_vocational_cost_per_student'].sum()
cost_per_student_2015 = grouped_cost_per_student[grouped_cost_per_student['year'] == 2015]['tech_vocational_cost_per_student'].sum()
cost_per_student_2016 = grouped_cost_per_student[grouped_cost_per_student['year'] == 2016]['tech_vocational_cost_per_student'].sum()

print(f'2014 Total: ${cost_per_student_2014:.2f}')
print(f'2015 Total: ${cost_per_student_2015:.2f}')
print(f'2016 Total: ${cost_per_student_2016:.2f}')

2014 Total: $5512.36
2015 Total: $8316.33
2016 Total: $11817.46


In [88]:
cost_per_student_difference_between_2014_2016 = cost_per_student_2016 - cost_per_student_2014

cost_per_student_percent_difference_between_2014_2016 = cost_per_student_difference_between_2014_2016 / cost_per_student_2014 * 100

print(f'Difference between 2014 and 2016: ${cost_per_student_difference_between_2014_2016:.2f}')
print(f'Percent difference between 2014 and 2016: {cost_per_student_percent_difference_between_2014_2016:.2f}%')

Difference between 2014 and 2016: $6305.10
Percent difference between 2014 and 2016: 114.38%


In [89]:
grouped = grouped.sort_values(by=['expenditure_title', 'year'])

# Then, calculate the year-to-year growth rate for each category
grouped['growth_rate'] = grouped.groupby('expenditure_title')['tech_vocational_cost_per_student'].pct_change()
grouped['growth_rate'] *= 100

grouped.head()

Unnamed: 0,year,expenditure_title,tech_vocational_cost_per_student,growth_rate
20,2020,cares_act_expenditure_tech_related_equipment,69.398844,
21,2020,cares_act_expenditure_tech_related_supplies_se...,175.002068,
0,2010,teacher_salaries_vocational_education,4967.648605,
1,2011,teacher_salaries_vocational_education,5143.232523,3.534548
2,2012,teacher_salaries_vocational_education,5135.916404,-0.142247


In [90]:
tech_vocational_salaries_cost_per_student_growth_rate_2013_2019 = grouped[
    (grouped['expenditure_title'] == 'teacher_salaries_vocational_education') &
    grouped['year'].between(2013, 2019)]['growth_rate'].mean()

print(f"Average cost per student growth rate of Teacher Salaries - Vocational Education between 2013 and 2019: {tech_vocational_salaries_cost_per_student_growth_rate_2013_2019:.2f}%")

Average cost per student growth rate of Teacher Salaries - Vocational Education between 2013 and 2019: 3.27%


In [91]:
tech_related_equipment_cost_per_student_growth_rate_2015_2016 = grouped[
    (grouped['expenditure_title'] == 'tech_related_equipment') &
    grouped['year'].between(2015, 2016)]['growth_rate'].mean()

tech_related_equipment_cost_per_student_growth_rate_2016_2020 = grouped[
    (grouped['expenditure_title'] == 'tech_related_equipment') &
    grouped['year'].between(2016, 2020)]['growth_rate'].mean()

print(f"Average cost per student growth rate of Tech Related Equipment between 2015 and 2016: {tech_related_equipment_cost_per_student_growth_rate_2015_2016:.2f}%")
print(f"Average cost per student growth rate of Tech Related Equipment between 2016 and 2020: {tech_related_equipment_cost_per_student_growth_rate_2016_2020:.2f}%")

Average cost per student growth rate of Tech Related Equipment between 2015 and 2016: 68.56%
Average cost per student growth rate of Tech Related Equipment between 2016 and 2020: 21.36%


In [92]:
tech_related_supplies_services_cost_per_student_growth_rate_2015_2016 = grouped[
    (grouped['expenditure_title'] == 'tech_related_supplies_services') &
    grouped['year'].between(2015, 2016)]['growth_rate'].mean()

tech_related_supplies_services_cost_per_student_growth_rate_2016_2020 = grouped[
    (grouped['expenditure_title'] == 'tech_related_supplies_services') &
    grouped['year'].between(2016, 2020)]['growth_rate'].mean()

print(f"Average cost per student growth rate of Tech Related Supplies & Services between 2015 and 2016: {tech_related_supplies_services_cost_per_student_growth_rate_2015_2016:.2f}%")
print(f"Average cost per student growth rate of Tech Related Supplies & Services between 2016 and 2020: {tech_related_supplies_services_cost_per_student_growth_rate_2016_2020:.2f}%")

Average cost per student growth rate of Tech Related Supplies & Services between 2015 and 2016: 146.71%
Average cost per student growth rate of Tech Related Supplies & Services between 2016 and 2020: 42.20%


In [93]:
# Create Line Chart for Cost per Student Expenses
tech_voc_cost_per_student_line_chart = px.line(grouped_cost_per_student,
                                               x= 'year',
                                               y= 'tech_vocational_cost_per_student',
                                               title= 'Tech and Vocational Cost per Student by Year')

In [94]:
# Create Line Chart for Cost per Student Expenses by Expenditure Title
tech_voc_cost_per_student_line_chart1 = px.line(grouped,
                                               x= 'year',
                                               y= 'tech_vocational_cost_per_student', 
                                               color='expenditure_title', 
                                               title= 'Tech and Vocational Cost per Student by Year & Expenditure Title')


In [95]:
# Create Line Chart for Cost per Student Expenses by Expenditure Title
tech_voc_cost_per_student_line_chart2 = px.line(grouped,
                                               x= 'year',
                                               y= 'growth_rate', 
                                               color='expenditure_title', 
                                               title= 'Tech and Vocational Cost per Student by Year & Expenditure Title')

tech_voc_cost_per_student_line_chart2.show()

In [96]:
tech_vocational_cost_per_student_line_charts_combined = create_combined_figure(
    tech_voc_cost_per_student_line_chart,
    tech_voc_cost_per_student_line_chart1,
    'Tech and Vocational Education Costs per Student Comparison',
    ('Tech & Vocational Cost per Student Over Time', 'Tech & Vocational Cost per Student by Title Over Time')
)

tech_vocational_cost_per_student_line_charts_combined.show()

### Notes:
The cost per student in tech and vocational education expenditures reveals significant growth trends, especially between 2014 and 2016, as observed in the line charts. This period marked a substantial 114.38% increase in costs per student, totaling an increase of $6305.10.

**Teacher Salaries - Vocational Education**
- `teacher_salaries_vocational_education` demonstrates an overall upward trend.
- The average annual growth rate from 2013 to 2019 was 3.27%.
- A significant rise of 7.35% was recorded in 2020, indicating an exceptional growth year.

**Tech-Related Equipment**
- The tech_related_equipment category experienced its highest growth between 2015 and 2016, at 68.56%.
- From 2016 to 2020, the average annual growth rate was 21.36%, showing sustained growth over the period.

**Tech-Related Supplies & Services**
- The tech_related_supplies_services category witnessed its most substantial increase between 2015 and 2016, at 146.71%.
- Although the growth rate decreased after 2016, it still maintained a high average annual growth rate of 42.20% until 2020.

In [97]:
grouped.head(25)

Unnamed: 0,year,expenditure_title,tech_vocational_cost_per_student,growth_rate
20,2020,cares_act_expenditure_tech_related_equipment,69.398844,
21,2020,cares_act_expenditure_tech_related_supplies_se...,175.002068,
0,2010,teacher_salaries_vocational_education,4967.648605,
1,2011,teacher_salaries_vocational_education,5143.232523,3.534548
2,2012,teacher_salaries_vocational_education,5135.916404,-0.142247
3,2013,teacher_salaries_vocational_education,5184.779075,0.951391
4,2014,teacher_salaries_vocational_education,5512.360265,6.318132
5,2015,teacher_salaries_vocational_education,5671.666525,2.889983
8,2016,teacher_salaries_vocational_education,5994.01603,5.683506
11,2017,teacher_salaries_vocational_education,6153.137942,2.654679


In [98]:
# Create Region Mapping
region_mapping = {
    'Alabama': 'South',
    'Alaska': 'West',
    'Arizona': 'West',
    'Arkansas': 'South',
    'California': 'West',
    'Colorado': 'West',
    'Connecticut': 'Northeast',
    'Delaware': 'South',
    'Florida': 'Southeast',
    'Georgia': 'Southeast',
    'Hawaii': 'West',
    'Idaho': 'West',
    'Illinois': 'Midwest',
    'Indiana': 'Midwest',
    'Iowa': 'Midwest',
    'Kansas': 'Midwest',
    'Kentucky': 'South',
    'Louisiana': 'South',
    'Maine': 'Northeast',
    'Maryland': 'South',
    'Massachusetts': 'Northeast',
    'Michigan': 'Midwest',
    'Minnesota': 'Midwest',
    'Mississippi': 'South',
    'Missouri': 'Midwest',
    'Montana': 'West',
    'Nebraska': 'Midwest',
    'Nevada': 'West',
    'New Hampshire': 'Northeast',
    'New Jersey': 'Northeast',
    'New Mexico': 'West',
    'New York': 'Northeast',
    'North Carolina': 'Southeast',
    'North Dakota': 'Midwest',
    'Ohio': 'Midwest',
    'Oklahoma': 'South',
    'Oregon': 'Pacific Northwest',
    'Pennsylvania': 'Northeast',
    'Rhode Island': 'Northeast',
    'South Carolina': 'Southeast',
    'South Dakota': 'Midwest',
    'Tennessee': 'South',
    'Texas': 'South',
    'Utah': 'West',
    'Vermont': 'Northeast',
    'Virginia': 'South',
    'Washington': 'Pacific Northwest',
    'West Virginia': 'South',
    'Wisconsin': 'Midwest',
    'Wyoming': 'West'
}

In [99]:
# Insert Region column
merged['region'] = merged['state'].map(region_mapping)
merged.head()

Unnamed: 0,state,year,student_count,expenditure_title,amount,amount_z_score_avg,tech_vocational_cost_per_student,region
0,Alabama,2010,748889,teacher_salaries_vocational_education,110309000.0,-1.67803e-19,147.296862,South
1,Alabama,2011,738322,teacher_salaries_vocational_education,107614000.0,-6.141666999999999e-19,145.754833,South
2,Alabama,2012,744621,teacher_salaries_vocational_education,105162000.0,-2.2560609999999997e-19,141.228894,South
3,Alabama,2013,744548,teacher_salaries_vocational_education,108247000.0,1.713433e-19,145.386194,South
4,Alabama,2014,740081,teacher_salaries_vocational_education,110787000.0,6.566667e-19,149.695777,South


In [100]:
regional_aggregations = merged.pivot_table(values= ['student_count', 'amount'], index= ['region', 'year'], aggfunc= 'sum')
regional_aggregations.reset_index(inplace=True)
regional_aggregations.head()

Unnamed: 0,region,year,amount,student_count
0,Midwest,2010,894448000.0,6028314
1,Midwest,2011,954296000.0,6868314
2,Midwest,2012,918036000.0,6816199
3,Midwest,2013,906328000.0,6778441
4,Midwest,2014,891829000.0,6764464


In [101]:
tech_voc_cost_by_region_line_chart = px.line(
    regional_aggregations,
    x='year',
    y='amount',
    color='region',
    title='Tech and Vocational Cost by Region Over Time'
)

tech_voc_cost_by_region_line_chart.show()


In [102]:
regional_aggregations['per_student_costs'] = regional_aggregations['amount'] / regional_aggregations['student_count']
regional_aggregations.head()

Unnamed: 0,region,year,amount,student_count,per_student_costs
0,Midwest,2010,894448000.0,6028314,148.374487
1,Midwest,2011,954296000.0,6868314,138.941813
2,Midwest,2012,918036000.0,6816199,134.684448
3,Midwest,2013,906328000.0,6778441,133.707441
4,Midwest,2014,891829000.0,6764464,131.840305


In [103]:
tech_voc_cost_by_region_line_chart1 = px.line(
    regional_aggregations,
    x='year',
    y='per_student_costs',
    color='region',
    title='Tech and Vocational Per Student Cost by Region Over Time'
)

tech_voc_cost_by_region_line_chart1.show()

In [104]:
tech_voc_cost_by_region_line_chart2 = px.line(
    regional_aggregations,
    x='year',
    y='student_count',
    color='region',
    title='Student Population by Region Over Time',
)

tech_voc_cost_by_region_line_chart2.show()

What occured between 2019 and 2020 to cause a spike in student population in the Pacific Northwest? They had a mostly consistent student population from 2010 to 2015. 2015 to 2018 saw a slight uptick in population growth. Between 2018 and 2019, the population fell, but then saw a sharp spike in 2019 to 2020. 

The west had a steady student population from 2010 to 2014, but then saw a sudden spike in 2015. The population continued to rise at an elevated rate until 2017, but then started seeing a decrease in 2018. This decrease in population occured until 2019, after which point, there was a sudden spike. Despite most of the US population being in the Eastern US, the West was in first place between 2010 and 2012. In 2012, the South took first place and maintained first through 2020. The Northeast took second place in 2018, then took third place after the Midwest surpassed it in 2020. 

In [105]:
regional_aggregations_w_exp_title = merged.pivot_table(values= ['student_count', 'amount'], index= ['region', 'year', 'expenditure_title'], aggfunc= 'sum')
regional_aggregations_w_exp_title.reset_index(inplace=True)
regional_aggregations_w_exp_title.head()

Unnamed: 0,region,year,expenditure_title,amount,student_count
0,Midwest,2010,teacher_salaries_vocational_education,894448000.0,6028314
1,Midwest,2011,teacher_salaries_vocational_education,954296000.0,6868314
2,Midwest,2012,teacher_salaries_vocational_education,918036000.0,6816199
3,Midwest,2013,teacher_salaries_vocational_education,906328000.0,6778441
4,Midwest,2014,teacher_salaries_vocational_education,891829000.0,6764464


In [106]:
regional_expenditures_line_grid = make_line_plot_grid(regional_aggregations_w_exp_title,
                                                      'year',
                                                      'amount',
                                                      'expenditure_title',
                                                      'region',
                                                      2,
                                                      'Expenditures by Region and Expenditure Title Over Time')

regional_expenditures_line_grid.show()

In [107]:
regional_aggregations_w_exp_title['cost_per_student'] = regional_aggregations_w_exp_title['amount'] / regional_aggregations_w_exp_title['student_count']
regional_aggregations_w_exp_title.head()

Unnamed: 0,region,year,expenditure_title,amount,student_count,cost_per_student
0,Midwest,2010,teacher_salaries_vocational_education,894448000.0,6028314,148.374487
1,Midwest,2011,teacher_salaries_vocational_education,954296000.0,6868314,138.941813
2,Midwest,2012,teacher_salaries_vocational_education,918036000.0,6816199,134.684448
3,Midwest,2013,teacher_salaries_vocational_education,906328000.0,6778441,133.707441
4,Midwest,2014,teacher_salaries_vocational_education,891829000.0,6764464,131.840305


In [108]:
regional_cost_per_student_line_grid = make_line_plot_grid(
    regional_aggregations_w_exp_title,
    x='year',
    y='cost_per_student',
    color='expenditure_title',
    facet_col='region',
    facet_col_wrap=2,
    title='Cost per Student by Region and Expenditure Title Over Time'
)
regional_cost_per_student_line_grid.show()

In [109]:
group_names = ['Low', 'Medium', 'High']
regional_aggregations_w_exp_title['student_count binned'] = regional_aggregations_w_exp_title.groupby('year')['student_count'] \
                                                         .transform(lambda x: pd.qcut(x, q=[0, .25, .75, 1.], labels=group_names))
regional_aggregations_w_exp_title.sort_values(['year', 'student_count'], inplace=True)

In [110]:
hist = px.histogram(regional_aggregations_w_exp_title, 
                    x='student_count binned', 
                    y= 'student_count',
                    histfunc='avg',  
                    facet_col='region',
                    facet_col_wrap=2,
                    title= 'Average Student Count Bin Distribution')

for axis in hist.layout:
    if axis.startswith('yaxis') or axis.startswith('xaxis'):
        hist.layout[axis].title.text = ''


hist.show()

In [111]:
regional_cost_per_student_bar_grid = make_bar_chart_grid(
                                    regional_aggregations_w_exp_title,
                                    x= 'year',
                                    y= 'cost_per_student',
                                    color= 'expenditure_title',
                                    facet_col= 'region',
                                    facet_col_wrap= 2,
                                    title= 'Cost per Student by Region & Expenditure Title'
                                    )

regional_cost_per_student_bar_grid.update_yaxes(title_text='Cost per Student')

regional_cost_per_student_bar_grid.show()

In [112]:
fig = px.scatter(
    regional_aggregations, 
    x="student_count", 
    y="per_student_costs",
    size="per_student_costs", 
    color="region",
    title="Cost per Student vs Student Count",
    log_x=False, 
    size_max=50,
    animation_frame="year",  # This will create the slider based on the 'year' column
    range_y=[regional_aggregations['per_student_costs'].min() - 50, 
             regional_aggregations['per_student_costs'].max() + 50],  # Expand the y-axis range by 50 units on both ends
    range_x=[regional_aggregations['student_count'].min() - 500000, 
             regional_aggregations['student_count'].max() + 500000]  # Expand the x-axis range by 50 units on both ends
)

# Update layout to ensure slider doesn't overlap with other elements and to set animation options
fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),  # Adjust the bottom margin to create space for the slider
    xaxis_title="Student Count",
    yaxis_title="Cost per Student",
    updatemenus=[{
        'buttons': [
            {
                'args': [None, {
                    'frame': {'duration': 1000, 'redraw': False}, 
                    'fromcurrent': True, 
                    'transition': {'duration': 600, 'easing': 'quadratic-in-out'}
                }],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {
                    'frame': {'duration': 0, 'redraw': False},
                    'mode': 'immediate', 
                    'transition': {'duration': 0}
                }],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }],
    hoverlabel=dict(
        bgcolor="white",
        font_size=16,
        font_family="Calibri"
        )
)

fig.layout.updatemenus[0].buttons[0].args[1]['repeat'] = True

fig.show()