In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
from ipywidgets import interactive
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.2f' %x)

## Reading CSVs
#### Do NOT run again
~ ecs_updated (becomes ecs) - includes funding details for all 50 states + DC (needs to be merged on State)

~ enrollment_changes (becomes enrollments) - includes calculations for annual enrollment changes by NCES ID (filtered to local districts not affiliated with a supervisory union only and only includes columns needed for the loop or for merging, can be merged on State with ECS data or on NCES ID for other sources)

~ localdistricts (becomes localdistricts) - does not include enrollment change calculations, but does still include columns for locale, ZIP code, FTE count, pupil/teacher ratio, and year (can be merged on NCES ID or State depending on the need, shape aligns exactly to enrollment_changes)

~ enrollmentsdf (becomes fullnces) - primary dataframe until the loop; contains all columns and rows so it may ultimately prove to need more cleaning and restructuring than is helpful (can be merged by state or NCES ID)

In [None]:
ecs = pd.read_csv("../data/ecs_updated.csv")
ecs.head(3)

In [None]:
enrollments = pd.read_csv("../data/enrollment_changes.csv")
enrollments.head(3)

In [None]:
localdistricts = pd.read_csv("../data/localdistricts.csv")
localdistricts.head(3)

In [None]:
#unlikely to use, but here for reference as needed
fullnces = pd.read_csv("../data/enrollmentsdf.csv")
fullnces.head(3)

In [None]:
netchangedist = enrollments.groupby(['State Name', 'Agency Name', 'NCES ID'])['Enrollment Change'].sum().reset_index()
netchangedist.head()

## Cleaning & Standardizing DataFrames
#### Do NOT run again

In [None]:
ecs = ecs.drop("Unnamed: 0", axis=1)
ecs.head(2)

In [None]:
ecs.rename(columns={'State':'State Name'}, inplace=True)
ecs.head(1)

In [None]:
enrollments = enrollments.drop("Unnamed: 0", axis=1)
enrollments.head(2)

In [None]:
netchangedist.rename(columns={'Enrollment Change':'Net Enrollment Change'}, inplace=True)
netchangedist.head(2)

## Merging DataFrames
#### Do NOT run again
Will also include dropping duplicate/unnecessary columns, standardizing column names, and reordering columns for efficiency.

In [None]:
netenrollments = enrollments.merge(netchangedist, on='NCES ID', suffixes=('_nces', '_net'))
netenrollments.head(3)

In [None]:
netenrollments.rename(columns={'Agency Name_nces':'Agency Name', 'State Name_nces':'State Name', 'Enrollment Change_nces':'Annual Enrollment Change', 'Enrollment Change_net':'Net Enrollment Change'}, inplace=True)
netenrollments.head(3)

In [None]:
netenrollments = netenrollments.drop(['State Name_net', 'Agency Name_net'], axis=1)
netenrollments.head(3)

In [None]:
enrollments.shape

In [None]:
netchangedist.shape

In [None]:
netenrollments.shape

In [None]:
localdistricts.shape

In [None]:
netandlocal = localdistricts.merge(netenrollments, on=['NCES ID', 'Year'], how='left', suffixes=('_loc', '_net'))
netandlocal.shape
#dataframes were the same shape - needed left merge rather than outer to avoid 8x duplicates

In [None]:
netandlocal.head(3)

In [None]:
netandlocal = netandlocal.drop(['Student Count_loc', 'Agency Name', 'District', 'State me'], axis=1)
netandlocal.head(3)

In [None]:
netandlocal.rename(columns={'Agency me':'Agency Name', 'State me':'State Name', 'Student Count_net':'Student Count'}, inplace=True)
netandlocal.head(3)

In [None]:
netandlocal.shape

In [None]:
netfunding = netandlocal.merge(ecs, on='State Name', how='outer', suffixes=('_net', '_ecs'))
netfunding.shape

In [None]:
netfunding.info()
#all rows should have a state name given the df shape and non-null count

In [None]:
netfunding.head(2)

In [None]:
#netfunding.to_csv("../data/netfunding.csv")

In [None]:
#netandlocal.to_csv("../data/netandlocal2.csv")

In [None]:
#netenrollments.to_csv("../data/netenrollments.csv")

## Additional DataFrames & Cleaning
#### Do NOT run again

In [None]:
data = pd.read_csv("../data/netfunding.csv")
data.shape

In [None]:
data.info()

In [None]:
data.head(2)

In [None]:
#data = data.drop(['Unnamed: 0'], axis=1)
#data.head(1)

In [None]:
cols = list(data.columns.values)
cols

In [None]:
cols = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'County', 'Student Count', 'Year', 'Enrollment Change', 'Net Enrollment Change', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Census Region', '2020 Election Result', '2020 Battleground State', 'Primary Funding Model', 'Model Name', 'Notes', 'Base Amount (Y/N)', 'Base Amount', 'Base Amount Legal Source', 'Student Count Method', 'Student Count Method Source']

In [None]:
data = data[cols]
data.head(2)

In [None]:
data['NCES ID'] = data['NCES ID'].astype(str).str.replace('.0', '', regex=False)
data.head(2)

In [None]:
data['Year'] = data['Year'].astype(str).str.replace('.0', '', regex=False)
data.head(2)

In [None]:
data['ZIP Code'] = data['ZIP Code'].astype(str).str.replace('.0', '', regex=False)
data.head(2)

In [None]:
data['Net Enrollment Change'] = data['Net Enrollment Change'].astype(str).str.replace('.0', '', regex=False)
data.head(2)

In [None]:
data['Net Enrollment Change'] = pd.to_numeric(data['Net Enrollment Change'], errors='coerce')
data['Net Enrollment Change'].dtype

In [None]:
data['School Count'] = data['School Count'].astype(str).str.replace('.0', '', regex=False)
data.head(2)

In [None]:
data['School Count'] = pd.to_numeric(data['School Count'], errors='coerce')
data['School Count'].dtype

In [None]:
data['Year'] = pd.to_numeric(data['Year'], errors='coerce')
data['Year'].dtype

In [None]:
data['ZIP Code'] = pd.to_numeric(data['ZIP Code'], errors='coerce')
data['ZIP Code'].dtype

In [None]:
data.info()

In [None]:
data.to_csv("../data/capstonedata_2.csv")
#dtypes have been changed and extra columns removed

## Import Final CSV & EDA
#### Start here after running initial code block

In [None]:
data = pd.read_csv("../data/capstonedata_final.csv", nrows=104923)
data.tail()

In [None]:
data.info()

In [None]:
data = data.drop(['Unnamed: 0'], axis=1)
data.head(1)

In [None]:
data['NCES ID'] = pd.to_numeric(data['NCES ID'], errors='coerce')
data['NCES ID'].dtype

In [None]:
data['ZIP Code'] = pd.to_numeric(data['ZIP Code'], errors='coerce')
data['ZIP Code'].dtype

In [None]:
data['Year'] = pd.to_numeric(data['Year'], errors='coerce')
data['Year'].dtype

In [None]:
data['Annual Budgetary Change'] = data['Enrollment Change'] * data['Base Amount']

In [None]:
data['Net Budgetary Change'] = data['Net Enrollment Change'] * data['Base Amount']

In [None]:
data.head()

In [None]:
data['Annual Budgetary Change'].value_counts()

In [None]:
data['Net Budgetary Change'].value_counts()

In [None]:
sbfdn = data.loc[data['Primary Funding Model'] == 'Student-based foundation']

In [None]:
rbased = data.loc[data['Primary Funding Model'] == 'Resource-based allocation']

In [None]:
hybrid = data.loc[data['Primary Funding Model'] == 'Hybrid']

In [None]:
gtbase = data.loc[data['Primary Funding Model'] == 'Guaranteed tax base']

In [None]:
baseamt = data.loc[data['Base Amount (Y/N)'] == 'Yes']

In [None]:
nbaseamt = data.loc[data['Base Amount (Y/N)'] == 'No']

In [None]:
northeast = data.loc[data['Census Region'] == 'Northeast']

In [None]:
midwest = data.loc[data['Census Region'] == 'Midwest']

In [None]:
south = data.loc[data['Census Region'] == 'South']

In [None]:
west = data.loc[data['Census Region'] == 'West']

In [None]:
republican = data.loc[data['2020 Election Result'] == 'Republican']

In [None]:
democrat = data.loc[data['2020 Election Result'] == 'Democrat']

In [None]:
batlgrnd = data.loc[data['2020 Battleground State'] == 'Yes']

In [None]:
nbatlgrnd = data.loc[data['2020 Battleground State'] == 'No']

In [None]:
year13 = data.loc[data['Year'] == 2013]
year13.head()

In [None]:
year14 = data.loc[data['Year'] == 2014]

In [None]:
year15 = data.loc[data['Year'] == 2015]

In [None]:
year16 = data.loc[data['Year'] == 2016]

In [None]:
year17 = data.loc[data['Year'] == 2017]

In [None]:
year18 = data.loc[data['Year'] == 2018]

In [None]:
year19 = data.loc[data['Year'] == 2019]

In [None]:
year20 = data.loc[data['Year'] == 2020]

In [None]:
top25g1 = data.nlargest(200, "Net Enrollment Change", keep='last')
top25g1
#running with 200 because variable numbers within each row keep them from being seen as duplicates. Next step is drop the unneeded columns, then drop dupes, and then rename the df for usability

In [None]:
top25g = top25g1.drop_duplicates(subset=['Net Enrollment Change'], keep='last')
top25g

In [None]:
len(top25g)

In [None]:
top25l1 = data.nsmallest(200, "Net Enrollment Change", keep='last')
len(top25l1)

In [None]:
top25l = top25l1.drop_duplicates(subset=['Net Enrollment Change'], keep='last')
top25l

In [None]:
len(top25l)

## EDA

In [None]:
#looking for areas of strong correlation to dig into with deeper analysis
# fig, ax = plt.subplots(figsize=(10, 10))

# corr = data.corr()
# cmap = sns.diverging_palette(220, 10, as_cmap=True)

# mask = np.zeros_like(corr, dtype=np.bool)
# mask[np.triu_indices_from(mask)] = True

# sns.heatmap(corr, cmap=cmap, mask = mask, center=0,
#            square=True, linewidths=.5, cbar_kws={"shrink": .5});

In [None]:
top25g['Locale'].value_counts(normalize=True)*100

In [None]:
top25g['State Name'].value_counts(normalize=True)*100

In [None]:
top25g['Primary Funding Model'].value_counts(normalize=True)*100

In [None]:
top25g['2020 Election Result'].value_counts(normalize=True)*100

In [None]:
top25g['Census Region'].value_counts(normalize=True)*100

In [None]:
top25g.groupby('Agency Name')['School Count'].value_counts()

In [None]:
top25l['Locale'].value_counts(normalize=True)*100

In [None]:
top25l['State Name'].value_counts(normalize=True)*100

In [None]:
top25l['Primary Funding Model'].value_counts(normalize=True)*100

In [None]:
top25l['2020 Election Result'].value_counts(normalize=True)*100

In [None]:
top25l['Census Region'].value_counts(normalize=True)*100

In [None]:
top25l.groupby('Agency Name')['School Count'].value_counts()

In [None]:
data['Locale'].value_counts(normalize=True)*100

In [None]:
data['State Name'].value_counts(normalize=True)*100

In [None]:
data['Primary Funding Model'].value_counts(normalize=True)*100

In [None]:
data['2020 Election Result'].value_counts(normalize=True)*100

In [None]:
data['Census Region'].value_counts(normalize=True)*100

In [None]:
growth_desc = top25g.describe()
growth_desc
#keep in mind that topenrollmentgrowth is based on 2013

In [None]:
loss_desc = top25l.describe()
loss_desc
#keep in mind that topenrollmentloss is based on 2013

In [None]:
desc_13 = year13.describe()
desc_13

In [None]:
desc_14 = year14.describe()
desc_14

In [None]:
desc_15 = year15.describe()
desc_15

In [None]:
desc_16 = year16.describe()
desc_16

In [None]:
desc_17 = year17.describe()
desc_17

In [None]:
desc_18 = year18.describe()
desc_18

In [None]:
desc_19 = year19.describe()
desc_19

In [None]:
desc_20 = year20.describe()
desc_20

In [None]:
budgetloss = data.nsmallest(25, "Annual Budgetary Change", keep='last')
budgetloss

In [None]:
budgetloss['Annual Budgetary Change'].value_counts()

In [None]:
budgetgrowth = data.nlargest(25, "Annual Budgetary Change", keep='last')
budgetgrowth

In [None]:
budgetgrowth['Annual Budgetary Change'].value_counts()

In [None]:
data.groupby('State Name')['Base Amount'].value_counts()

## to_csv

In [None]:
#data.to_csv("../data/capstonedata_final.csv")

In [None]:
#data.to_csv("../data/top25_growth.csv")

In [None]:
#data.to_csv("../data/top25_loss.csv")

## Visualizations

In [None]:
#top 25 districts with net enrollment growth
fig, ax = plt.subplots(figsize=(8, 8))
plt.barh(top25g['Agency Name'], top25g['Net Enrollment Change'], color='blue')
plt.xlabel('Net Enrollment Change')
plt.tight_layout()
plt.show();

In [None]:
#top 25 districts with net enrollment growth, seaborn
fig, ax = plt.subplots(figsize=(17, 10))
sns.set_style('ticks')
ax = sns.barplot(x='Net Enrollment Change', y='Agency Name',
            data=top25g,
            color='whitesmoke',
            edgecolor='gainsboro',
            lw=2)
ax.bar_label(ax.containers[0], size=12, padding=2)
plt.xlabel('Net Enrollment Change')
plt.ylabel(None)
plt.title('Districts with Largest Net Enrollment Growth, 2013 - 2020')
plt.tight_layout()
plt.savefig('Top 25 Net Enrollment Growth Districts')
plt.show();
#not sure if the bar labels are helpful or distracting at this point

In [None]:
#top 25 districts with net enrollment loss, seaborn
fig, ax = plt.subplots(figsize=(18, 10))
sns.set_style('white')
ax = sns.barplot(x='Net Enrollment Change', y='Agency Name',
            data=top25l,
           color='dimgray',
            edgecolor='white',
            lw=2)
ax.bar_label(ax.containers[0], size=12, padding=2)
plt.xlabel('Net Enrollment Change')
plt.ylabel(None)
plt.xticks([-200000, -160000, -120000, -80000, -40000, 0])
plt.title('Districts with Largest Net Enrollment Loss, 2013 - 2020')
plt.tight_layout()
plt.savefig('Top 25 Net Enrollment Loss Districts')
plt.show();
#look into changing the x axis labels so that the graph doesn't have to stretch as much for LAUSD 

In [None]:
top25combo = pd.concat([top25g, top25l])
top25combo

In [None]:
#profiles of top 25 - state
sns.set(rc={"figure.figsize":(8,8)})
state_order=['TEXAS', 'CALIFORNIA', 'FLORIDA', 'UTAH', 'MARYLAND', 'GEORGIA', 'VIRGINIA', 'IDAHO', 'NORTH CAROLINA', 'ILLINOIS', 'TENNESSEE', 'PENNSYLVANIA', 'HAWAII', 'NEW MEXICO', 'OKLAHOMA', 'LOUISIANA', 'MISSISSIPPI', 'INDIANA']
sns.countplot(x='State Name', data=top25combo, order=state_order, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by State - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#countplot by state for top 25 enrollment growth
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
state_order=['TEXAS', 'FLORIDA', 'UTAH', 'GEORGIA', 'MARYLAND', 'IDAHO', 'NORTH CAROLINA', 'VIRGINIA']
sns.countplot(x='State Name', data=top25g, order=state_order, color='gainsboro', edgecolor='dimgrey')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of Districts by State - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.savefig('States in Top 25 Growth')
plt.show();

In [None]:
#countplot by state for top 25 enrollment loss
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
state_order=['TEXAS', 'CALIFORNIA', 'FLORIDA', 'HAWAII', 'ILLINOIS', 'INDIANA', 'LOUISIANA', 'MISSISSIPPI', 'NEW MEXICO', 'OKLAHOMA', 'PENNSYLVANIA', 'TENNESSEE']
sns.countplot(x='State Name', data=top25l, order=state_order, color='dimgray', edgecolor='white')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of Districts by State - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.savefig('States in Top 25 Loss')
plt.show();

In [None]:
#profiles of top 25 - locale
sns.set(rc={"figure.figsize":(8,8)})
locale_order=['21-Suburb: Large', '11-City: Large', '12-City: Mid-size', '13-City: Small', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='Locale', data=top25combo, order=locale_order, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Locale - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 growth - locale
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
locale_order=['21-Suburb: Large', '13-City: Small', '11-City: Large', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='Locale', data=top25g, order=locale_order, color='gainsboro', edgecolor='dimgrey')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Locale - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.savefig('Locale Top 25 Growth')
plt.show();

In [None]:
#profiles of top 25 loss - locale
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
#locale_order=['21-Suburb: Large', '13-City: Small', '11-City: Large', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='Locale', data=top25l, color='dimgray', edgecolor='white')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Locale - Top 25 Enrollment Loss, 2013 - 2020')
plt.tight_layout()
plt.savefig('Locale Top 25 Loss')
plt.show();

In [None]:
#profiles of top 25 - census region
sns.set(rc={"figure.figsize":(8,8)})
#locale_order=['21-Suburb: Large', '11-City: Large', '12-City: Mid-size', '13-City: Small', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='Census Region', data=top25combo, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Census Region - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 growth - census
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
#locale_order=['21-Suburb: Large', '13-City: Small', '11-City: Large', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='Census Region', data=top25g, color='gainsboro', edgecolor='dimgrey')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Census Region - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.savefig('Census Region Top 25 Growth')
plt.show();

In [None]:
#profiles of top 25 growth - census
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
census_order=['South', 'West', 'Midwest', 'Northeast']
sns.countplot(x='Census Region', data=top25l, order=census_order, color='dimgray', edgecolor='white')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Census Region - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.savefig('Census Region Top 15 Loss')
plt.show();

In [None]:
#profiles of top 25 - election result
sns.set(rc={"figure.figsize":(8,8)})
#locale_order=['21-Suburb: Large', '11-City: Large', '12-City: Mid-size', '13-City: Small', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='2020 Election Result', data=top25combo, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by 2020 Election Result - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 growth - election result
sns.set(rc={"figure.figsize":(8,8)})
#locale_order=['21-Suburb: Large', '13-City: Small', '11-City: Large', '41-Rural: Fringe', '43-Rural: Remote', '32-Town: Distant', '31-Town: Fringe']
sns.countplot(x='2020 Election Result', data=top25g, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by 2020 Election Result - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 growth - election result
sns.set(rc={"figure.figsize":(8,8)})
result_order=['Republican', 'Democrat']
sns.countplot(x='2020 Election Result', data=top25l, order=result_order, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by 2020 Election Result - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Primary Funding Model', data=top25combo, order=funding_order, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Primary Funding Model - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Primary Funding Model', data=top25g, order=funding_order, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Primary Funding Model - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Primary Funding Model', data=top25l, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Primary Funding Model - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Base Amount (Y/N)', data=top25combo, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Base Amount - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Base Amount (Y/N)', data=top25g, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Base Amount - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.countplot(x='Base Amount (Y/N)', data=top25l, palette='ocean_r')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Base Amount - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Pupil/Teacher Ratio', data=top25combo, bins=8, color='#76b6c4')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Pupil/Teacher Ratio')
plt.ylabel('Number of Districts')
plt.title('Distribution of Pupil/Teacher Ratio - Top 25 Growth & Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Pupil/Teacher Ratio', data=top25g, bins=8, color='#76b6c4')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Pupil/Teacher Ratio')
plt.ylabel('Number of Districts')
plt.title('Distribution of Pupil/Teacher Ratio - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Pupil/Teacher Ratio', data=top25l, bins=8, color='#76b6c4')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Pupil/Teacher Ratio')
plt.ylabel('Number of Districts')
plt.title('Distribution of Pupil/Teacher Ratio - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
sns.set(rc={"figure.figsize":(22,10)})
sns.lineplot(data['State Name'], data['Net Enrollment Change'])
plt.xticks(rotation=70);

In [None]:
sns.relplot(x='Year', y='Student Count',
           hue='Census Region',
            data=data.loc[data['State Name'].isin(['TEXAS', 'CALIFORNIA', 'FLORIDA', 'UTAH', 'MARYLAND', 'GEORGIA', 'VIRIGINIA', 'IDAHO', 'NORTH CAROLINA'])],
            kind='line', ci=None);

In [None]:
sns.relplot(x='Year', y='Student Count',
           hue='State Name',
            data=data,
            kind='line', ci=None);

In [None]:
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Base Amount', data=year20, bins=6, color='#064273')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Funding Base Amount')
plt.ylabel('Number of Districts')
plt.title('Distribution of Base Amount Funding, 2020')
plt.tight_layout()
plt.show();

In [None]:
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Base Amount', data=top25g, bins=5, color='#064273')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Funding Base Amount')
plt.ylabel('Number of Districts')
plt.title('Distribution of Base Amount Funding - Top 25 Growth, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
sns.set(rc={"figure.figsize":(8,8)})
#funding_order=['Student-based foundation', 'Resource-based allocation', 'Hybrid']
sns.histplot(x='Base Amount', data=top25l, bins=5, color='#064273')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
#plt.xticks(rotation=70)
plt.xlabel('Funding Base Amount')
plt.ylabel('Number of Districts')
plt.title('Distribution of Base Amount Funding - Top 25 Loss, 2013 - 2020')
plt.tight_layout()
plt.show();

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
sns.set_style('white')
state_order=['DISTRICT OF COLUMBIA', 'NEW JERSEY', 'CONNECTICUT', 'RHODE ISLAND', 'NORTH DAKOTA', 'MICHIGAN', 'CALIFORNIA', 'MARYLAND', 'COLORADO', 'ARKANSAS', 'NEVADA', 'NEW YORK', 'MINNESOTA', 'MISSOURI', 'IOWA', 'TEXAS', 'INDIANA', 'ALASKA', 'MISSISSIPPI', 'HAWAII', 'FLORIDA','KANSAS', 'OREGON', 'ARIZONA', 'LOUISIANA', 'KENTUCKY', 'UTAH', 'NEW HAMPSHIRE', 'GEORGIA', 'SOUTH CAROLINA']
ax = sns.barplot(x='Base Amount', y='State Name',
            data=baseamt,
            order=state_order,
           color='#8099FF')
# for index, value in enumerate(y):
#     plt.text(value, index,
#             str(value))
ax.bar_label(ax.containers[0], size=12, padding=3)
plt.xlabel('Base Amount')
plt.ylabel(None)
plt.title('Base Amount Funding (in USD)')
plt.tight_layout()
#plt.savefig('Base Amount Funding - National')
plt.show();

In [None]:
statefunding = baseamt.groupby('Base Amount')['State Name'].value_counts()
type(statefunding)

In [None]:
statefunding.head()

In [None]:
#profiles of top 25 - funding model
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
funding_order=['Student-based foundation', 'Hybrid', 'Resource-based allocation', 'Guaranteed tax base']
sns.countplot(x='Primary Funding Model', data=data, order=funding_order, color='#8099FF')
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right', borderaxespad=0)
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Number of Districts')
plt.title('Number of School Districts by Primary Funding Model - National')
plt.tight_layout()
plt.show();

In [None]:
fundingdf =(data['Primary Funding Model'].value_counts(normalize=True)*100).rename_axis('Funding Type').to_frame('Percentage').reset_index()
fundingdf

In [None]:
type(fundingdf)

In [None]:
sns.set(rc={"figure.figsize":(8,8)})
sns.set_style('white')
#funding_order=['Student-based foundation', 'Hybrid', 'Resource-based allocation', 'Guaranteed tax base']
sns.barplot(x='Funding Type', y='Percentage', data=fundingdf, color='#8099FF')
plt.xticks(rotation=70)
plt.xlabel(None)
plt.ylabel('Percentage of Districts')
plt.title('Percentage of School Districts by Primary Funding Model - National')
plt.tight_layout()
plt.show();

In [None]:
base_amount = baseamt[['State Name', 'Base Amount']]
base_amount.head()

In [None]:
basefunding = base_amount.drop_duplicates(keep='first').reset_index()
basefunding.head(10)

In [None]:
basefunding = basefunding.drop(columns='index')

In [None]:
basefunding.head()

In [None]:
bfunding = basefunding.rename(columns = {'state' : 'State Name', 'amount' : 'Base Amount'})
bfunding.head()

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
sns.set_style('white')
state_order=['DISTRICT OF COLUMBIA', 'NEW JERSEY', 'CONNECTICUT', 'RHODE ISLAND', 'NORTH DAKOTA', 'MICHIGAN', 'CALIFORNIA', 'MARYLAND', 'COLORADO', 'ARKANSAS', 'NEVADA', 'NEW YORK', 'MINNESOTA', 'MISSOURI', 'IOWA', 'TEXAS', 'INDIANA', 'ALASKA', 'MISSISSIPPI', 'HAWAII', 'FLORIDA','KANSAS', 'OREGON', 'ARIZONA', 'LOUISIANA', 'KENTUCKY', 'UTAH', 'NEW HAMPSHIRE', 'GEORGIA', 'SOUTH CAROLINA']
ax = sns.barplot(x='Base Amount', y='State Name', data=bfunding, order=state_order, color='#0033FF')
ax.bar_label(ax.containers[0], size=12, padding=3, fmt='%.2f')
plt.xlabel('Base Amount')
plt.ylabel(None)
plt.title('Base Amount Funding (in USD)')
plt.tight_layout()
plt.savefig('Base Amount Funding - National_2')
plt.show();
#hooray - floats are standardized in the bar labels