# Read in CSVs based on NCES Tables

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
from ipywidgets import interactive
pd.set_option('display.max_columns', 500)

ELSI Table Generator - https://nces.ed.gov/ccd/elsi/tableGenerator.aspx?savedTableID=357855

Select a Table Row - District

Select Years - 13-14, 14-15, 15-16, 16-17, 17-18, 18-19, 19-20, 20-21

Select Table Columns - Information (7), Characteristics (4), Enrollments (1), Teachers & Staff (3)

Select Filters (Refinements) - All 50 States + DC

In [2]:
year_1314 = pd.read_csv("../data/elsi1314.csv", skiprows=6, nrows=18609)
#year_1314.tail()

In [3]:
year_1415 = pd.read_csv("../data/elsi1415.csv", skiprows=6, nrows=18620)
#year_1415.tail()

In [4]:
year_1516 = pd.read_csv("../data/elsi1516.csv", skiprows=6, nrows=18678)
#year_1516.tail()

In [5]:
year_1617 = pd.read_csv("../data/elsi1617.csv", skiprows=6, nrows=18468)
#year_1617.tail()

In [6]:
year_1718 = pd.read_csv("../data/elsi1718.csv", skiprows=6, nrows=18440)
#year_1718.tail()

In [7]:
year_1819 = pd.read_csv("../data/elsi1819.csv", skiprows=6, nrows=19406)
#year_1819.tail()

In [8]:
year_1920 = pd.read_csv("../data/elsi1920.csv", skiprows=6, nrows=19534)
#year_1920.tail()

In [9]:
year_2021 = pd.read_csv("../data/elsi2021.csv", skiprows=6, nrows=19388)
#year_2021.tail()

## DataFrame Shape
Confirms that each DataFrame has an equivalent number of columns

In [10]:
#year_1314.shape

In [11]:
#year_1415.shape

In [12]:
#year_1516.shape

In [13]:
#year_1617.shape

In [14]:
#year_1718.shape

In [15]:
#year_1819.shape

In [16]:
#year_1920.shape

In [17]:
#year_2021.shape

## Adding Year Column
By adding a static year column to each DataFrame, the columns can be given generic names in order to be combined later

In [18]:
year_1314['Year'] = '2013-2014'
#year_1314.head(2)

In [19]:
year_1415['Year'] = '2014-2015'
#year_1415.head(2)

In [20]:
year_1516['Year'] = '2015-2016'
#year_1516.head(2)

In [21]:
year_1617['Year'] = '2016-2017'
#year_1617.head(2)

In [22]:
year_1718['Year'] = '2017-2018'
#year_1718.head(2)

In [23]:
year_1819['Year'] = '2018-2019'
#year_1819.head(2)

In [24]:
year_1920['Year'] = '2019-2020'
#year_1920.head(2)

In [25]:
year_2021['Year'] = '2020-2021'
#year_2021.head(2)

## Standardizing Column Names
Each annual DataFrame will have the same columns. Based on the number of nulls in each, either the State Name or State column will be dropped. State Name is derived from the 'last year available' data and State is derived from the selected year.

In [26]:
year_1314.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1314.head(2)

In [27]:
year_1415.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1415.head(2)

In [28]:
year_1516.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1516.head(2)

In [29]:
year_1617.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1617.head(2)

In [30]:
year_1718.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1718.head(2)

In [31]:
year_1819.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1819.head(2)

In [32]:
year_1920.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_1920.head(2)

In [33]:
year_2021.columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year']
#year_2021.head(2)

## pd.Concat & df.reindex

In [34]:
enrollments = pd.concat([year_1314, year_1415, year_1516, year_1617, year_1718, year_1819, year_1920, year_2021], axis=0)
#enrollments.tail()

In [35]:
enrollments = enrollments.reindex(columns = ['Agency Name', 'State Name', 'State Abbreviation', 'NCES ID', 'State', 'District', 'County', 'School Count', 'ZIP Code', 'District Type', 'Locale', 'Start of Year Status', 'Updated Status', 'Student Count', 'FTE Teachers', 'Pupil/Teacher Ratio', 'Total Staff', 'Year'])
#enrollments.head()