# School Enrollment Script
Run all of these code blocks in order to create the SCHOOL_ENROLLMENT data table.

### Import Statements

In [10]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [11]:
def read_file_details(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Keep only full population stats
    df = df.iloc[:6]

    # Move Population 3 years and over enrolled in school from row to column
    df['Population 3 years and over enrolled in school'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Population 3 years and over enrolled in school']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Population 3 years and over enrolled in school'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Nursery school, preschool':'nursery_preschool', 'Kindergarten':'kindergarten', 'Elementary school (grades 1-8)':'elementary_grades_1_8', 'High school (grades 9-12)':'highSchool_grades_9_12', 'College or graduate school':'college_grad_school'}, inplace=True)

    # Calculate extra column
    df['pop_3_plus_school_enrolled'] = df.sum(axis=1)

    return df

### Test that function works

In [12]:
read_file_details('data/2014/SCHOOL_ENROLLMENT.CSV',2014)

Label,nursery_preschool,kindergarten,elementary_grades_1_8,highSchool_grades_9_12,college_grad_school,pop_3_plus_school_enrolled
2014,1118.208,688.128,7526.4,3677.184,8494.08,21504.0


### Test that function works for each year
Should return only "No file for year '2020'"

In [13]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/SCHOOL_ENROLLMENT.CSV'

    try:
        read_file_details(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [14]:
SCHOOL_ENROLLMENT_2022 = read_file_details('data/2022/SCHOOL_ENROLLMENT.CSV',2022)
SCHOOL_ENROLLMENT_2021 = read_file_details('data/2021/SCHOOL_ENROLLMENT.CSV',2021)
SCHOOL_ENROLLMENT_2019 = read_file_details('data/2019/SCHOOL_ENROLLMENT.CSV',2019)
SCHOOL_ENROLLMENT_2018 = read_file_details('data/2018/SCHOOL_ENROLLMENT.CSV',2018)
SCHOOL_ENROLLMENT_2017 = read_file_details('data/2017/SCHOOL_ENROLLMENT.CSV',2017)
SCHOOL_ENROLLMENT_2016 = read_file_details('data/2016/SCHOOL_ENROLLMENT.CSV',2016)
SCHOOL_ENROLLMENT_2015 = read_file_details('data/2015/SCHOOL_ENROLLMENT.CSV',2015)
SCHOOL_ENROLLMENT_2014 = read_file_details('data/2014/SCHOOL_ENROLLMENT.CSV',2014)
SCHOOL_ENROLLMENT_2013 = read_file_details('data/2013/SCHOOL_ENROLLMENT.CSV',2013)
SCHOOL_ENROLLMENT_2012 = read_file_details('data/2012/SCHOOL_ENROLLMENT.CSV',2012)
SCHOOL_ENROLLMENT_2011 = read_file_details('data/2011/SCHOOL_ENROLLMENT.CSV',2011)
SCHOOL_ENROLLMENT_2010 = read_file_details('data/2010/SCHOOL_ENROLLMENT.CSV',2010)

### Combine each year into single dataframe containing all years

In [15]:
SCHOOL_ENROLLMENT = pd.concat([SCHOOL_ENROLLMENT_2022,SCHOOL_ENROLLMENT_2021,SCHOOL_ENROLLMENT_2019,SCHOOL_ENROLLMENT_2018,SCHOOL_ENROLLMENT_2017,SCHOOL_ENROLLMENT_2016,SCHOOL_ENROLLMENT_2015,SCHOOL_ENROLLMENT_2014,SCHOOL_ENROLLMENT_2013,SCHOOL_ENROLLMENT_2012,SCHOOL_ENROLLMENT_2011,SCHOOL_ENROLLMENT_2010])
SCHOOL_ENROLLMENT

Label,nursery_preschool,kindergarten,elementary_grades_1_8,highSchool_grades_9_12,college_grad_school,pop_3_plus_school_enrolled
2022,1834.56,687.96,9860.76,3967.236,6581.484,22932.0
2021,1389.3,856.735,8405.265,3982.66,8521.04,23155.0
2019,1053.855,1288.045,9016.315,3723.621,8337.164,23419.0
2018,1254.855,2025.38,7286.965,3918.67,7507.115,21992.985
2017,1837.8,816.8,8351.78,2511.66,6901.96,20420.0
2016,940.7,1147.654,6039.294,2934.984,7751.368,18814.0
2015,1389.172,716.992,8402.25,3898.644,7998.942,22406.0
2014,1118.208,688.128,7526.4,3677.184,8494.08,21504.0
2013,839.16,923.076,8286.705,3524.472,7405.587,20979.0
2012,939.55,920.759,6952.67,3100.515,6896.297,18809.791


### Reset index on combined dataframe

In [16]:
SCHOOL_ENROLLMENT.reset_index(inplace=True)
SCHOOL_ENROLLMENT.rename(columns={'index':'year'}, inplace=True)
SCHOOL_ENROLLMENT

Label,year,nursery_preschool,kindergarten,elementary_grades_1_8,highSchool_grades_9_12,college_grad_school,pop_3_plus_school_enrolled
0,2022,1834.56,687.96,9860.76,3967.236,6581.484,22932.0
1,2021,1389.3,856.735,8405.265,3982.66,8521.04,23155.0
2,2019,1053.855,1288.045,9016.315,3723.621,8337.164,23419.0
3,2018,1254.855,2025.38,7286.965,3918.67,7507.115,21992.985
4,2017,1837.8,816.8,8351.78,2511.66,6901.96,20420.0
5,2016,940.7,1147.654,6039.294,2934.984,7751.368,18814.0
6,2015,1389.172,716.992,8402.25,3898.644,7998.942,22406.0
7,2014,1118.208,688.128,7526.4,3677.184,8494.08,21504.0
8,2013,839.16,923.076,8286.705,3524.472,7405.587,20979.0
9,2012,939.55,920.759,6952.67,3100.515,6896.297,18809.791


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [17]:
SCHOOL_ENROLLMENT.to_csv("data/final/SCHOOL_ENROLLMENT.CSV", index=False)

### Check that dataframe was saved properly as CSV

In [18]:
df = pd.read_csv('data/final/SCHOOL_ENROLLMENT.CSV')
df

Unnamed: 0,year,nursery_preschool,kindergarten,elementary_grades_1_8,highSchool_grades_9_12,college_grad_school,pop_3_plus_school_enrolled
0,2022,1834.56,687.96,9860.76,3967.236,6581.484,22932.0
1,2021,1389.3,856.735,8405.265,3982.66,8521.04,23155.0
2,2019,1053.855,1288.045,9016.315,3723.621,8337.164,23419.0
3,2018,1254.855,2025.38,7286.965,3918.67,7507.115,21992.985
4,2017,1837.8,816.8,8351.78,2511.66,6901.96,20420.0
5,2016,940.7,1147.654,6039.294,2934.984,7751.368,18814.0
6,2015,1389.172,716.992,8402.25,3898.644,7998.942,22406.0
7,2014,1118.208,688.128,7526.4,3677.184,8494.08,21504.0
8,2013,839.16,923.076,8286.705,3524.472,7405.587,20979.0
9,2012,939.55,920.759,6952.67,3100.515,6896.297,18809.791
