# Read in HTML from ECS website

In [None]:
import pandas as pd
import html5lib
url = 'https://reports.ecs.org/comparisons/k-12-and-special-education-funding-2021'
ecsw = pd.read_html(url)
ecs = pd.concat(ecsw)
ecs
#https://stackoverflow.com/questions/39710903/pd-read-html-imports-a-list-rather-than-a-dataframe

In [None]:
ecs = ecs.rename(columns = {'STATE  arrow_upward' : 'State', 'Source' : 'Base Amount Legal Source', 'Source.1' : 'Student Count Method Source', 'Source.2' : 'Special Education Funding Source', 'Source.3' : 'ELL Amount Source', 'Source.4' : 'Low-Income Funding Source', 'Source.5' : 'Gifted Funding Source', 'Source.6' : 'Small Size Identifier Source'})
ecs

In [None]:
ecs.info()

# Additional DataFrames

1. Funding Model: sbfdn(student-based foundation), rbased (resource-based), hybrid, gtbase (guaranteed tax base)

2. Base Amount: baseamt (base amount) and nbaseamt (no base amount)

3. Census Region: northeast, midwest, south, west (https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf)

4. 2020 Presidential Election: dem, repub, battleground (https://ballotpedia.org/Presidential_battleground_states,_2020) (https://www.archives.gov/electoral-college/2020) *states that split electoral votes will be assigned to the party that received the majority

5. Mask requirements in schools - may be harder to track due to changes over time?

6. Low income designations/distributions to look at e-rate funding potential (if time allows)

In [None]:
#sending to csv to create columns with some of the information listed above - should provide greater flexibility in looking at correlations and other EDA
#ecs_table.to_csv('ecs_table.csv')

In [None]:
ecs.head()

In [None]:
ecs['State'] = ecs['State'].str.upper()
ecs.head()

In [None]:
#ecs.to_csv("../data/ecs_updated.csv")

In [None]:
ecs = pd.read_csv("../data/ecs_table.csv", nrows=51)
#ecs.tail()
#trimmed final row with column totals

In [None]:
sbfdn = ecs.loc[ecs['Primary Funding Model'] == 'Student-based foundation']
#sbfdn.head()

In [None]:
rbased = ecs.loc[ecs['Primary Funding Model'] == 'Resource-based allocation']
#rbased.head()

In [None]:
hybrid = ecs.loc[ecs['Primary Funding Model'] == 'Hybrid']
#hybrid.head()

In [None]:
gtbase = ecs.loc[ecs['Primary Funding Model'] == 'Guaranteed tax base']
#gtbase.head()

In [None]:
baseamt = ecs.loc[ecs['Base Amount (Y/N)'] == 'Yes']
#baseamt.head()

In [None]:
nbaseamt = ecs.loc[ecs['Base Amount (Y/N)'] == 'No']
#nbaseamt.head()

In [None]:
northeast = ecs.loc[ecs['Census Region'] == 'Northeast']
#northeast.head()

In [None]:
midwest = ecs.loc[ecs['Census Region'] == 'Midwest']
#midwest.head()

In [None]:
south = ecs.loc[ecs['Census Region'] == 'South']
#south.head()

In [None]:
west = ecs.loc[ecs['Census Region'] == 'West']
#west.head()

In [None]:
republican = ecs.loc[ecs['2020 Election Result'] == 'Republican']
#republican.head()

In [None]:
democrat = ecs.loc[ecs['2020 Election Result'] == 'Democrat']
#democrat.head()

In [None]:
batlgrnd = ecs.loc[ecs['2020 Battleground State'] == 'Yes']
#batlgrnd.head()

In [None]:
nbatlgrnd = ecs.loc[ecs['2020 Battleground State'] == 'No']
#nbatlgrnd.head()

# General EDA

In [None]:
(ecs['Primary Funding Model'].value_counts(normalize=True))*100

In [None]:
ecs['Primary Funding Model'].value_counts()

In [None]:
#ecs.groupby('Primary Funding Model')['State'].value_counts()
#display is terrible - need to look into spacing issues or other ways to represent the information in a format that is easier to read

In [None]:
ecs.groupby('Census Region')['Primary Funding Model'].value_counts()

In [None]:
(ecs.groupby('Census Region')['Primary Funding Model'].value_counts(normalize=True))*100

In [None]:
baseamt.sort_values(by='Base Amount', ascending=False)

In [None]:
baseamt.groupby('Base Amount')['State'].value_counts()