In [3]:
import pandas as pd
import numpy as np
import util

enrollment_path = './data/mn/enrollment/'

enrollment_files = [
    'mn_enrollment_2014_cleaned.csv'
    , 'mn_enrollment_2015_cleaned.csv'
    , 'mn_enrollment_2016_cleaned.csv'
    , 'mn_enrollment_2017_cleaned.csv'
    , 'mn_enrollment_2018_cleaned.csv'
    , 'mn_enrollment_2019_cleaned.csv'
]

years = [x[14:18] for x in enrollment_files]

df = pd.read_csv(enrollment_path + enrollment_files[0])
df['year'] = df.apply(lambda x: years[0], axis=1)

for i in range(1, len(enrollment_files)):
    df2 = pd.read_csv(enrollment_path + enrollment_files[i])
    df2['year'] = df2.apply(lambda x: years[i], axis=1)
    df = df.append(df2, ignore_index = True, sort = True)

In [5]:
# rename fields
df = df.rename(columns = {
    'District Name': 'district'
    , 'District Number': 'district_id'
    , 'Grade': 'grade'
    , 'School Name': 'school'
    , 'School Number': 'school_id'
    , 'Total American Indian or Alaska Native Count': 'American Indian or Alaska Native'
    , 'Total Asian Count': 'Asian'
    , 'Total Black or African American Count': 'Black or African American'
    , 'Total English learner Identified Count': 'English Learner Identified'
    , 'Total Enrollment': 'All Groups'
    , 'Total Hispanic or Latino Count': 'Hispanic or Latino'
    , 'Total Native Hawaiian or Pacific Islander Count': 'Native Hawaiian or Pacific Islander'
    , 'Total Students Eligible for Free or Reduced Priced Meals Count': 'Students Eligible for Free or Reduced Priced Meals'
    , 'Total Students Experiencing Homelessness Count': 'Students Experiencing Homelessness'
    , 'Total Students Receiving Special Education Services Count': 'Students Receiving Special Education Services'
    , 'Total Students of Color or American Indian Count': 'Students of Color or American Indian'
    , 'Total Two or More Races Count': 'Two or More Races'
    , 'Total White Count': 'White'
})

Index(['Additional Suppression English Learner Identified Applied',
       'Additional Suppression English learner Identified Applied',
       'Additional Suppression Free or Reduced Priced Meals Applied',
       'Additional Suppression Free or Reduced-Priced Meals Applied',
       'Additional Suppression Homeless Applied',
       'Additional Suppression Special Education Applied',
       'American Indian or Alaska Native Female Count',
       'American Indian or Alaska Native Male Count', 'Asian Female Count',
       'Asian Male Count', 'Black or African American Female Count',
       'Black or African American Male Count', 'County Name', 'County Number',
       'Data Year', 'district', 'district_id', 'District Type', 'ECSU Number',
       'Economic Development Region', 'Filter Groups', 'grade',
       'Hispanic or Latino Female Count', 'Hispanic or Latino Male Count',
       'Native Hawaiian or Pacific Islander Female Count',
       'Native Hawaiian or Pacific Islander Male Count',
 

In [7]:
# reshape data
df = pd.melt(df, id_vars=['year', 'district_id', 'district', 'school_id', 'school', 'grade']
        , value_vars=[
            'American Indian or Alaska Native'
            , 'Asian'
            , 'Black or African American'
            , 'English Learner Identified'
            , 'All Groups'
            , 'Hispanic or Latino'
            , 'Native Hawaiian or Pacific Islander'
            , 'Students Eligible for Free or Reduced Priced Meals'
            , 'Students Experiencing Homelessness'
            , 'Students Receiving Special Education Services'
            , 'Students of Color or American Indian'
            , 'Two or More Races'
            , 'White'
        ],
        var_name='group_state'
        , value_name='num')

In [11]:
# import 2013 data, separated into 2 files
df_2013 = pd.read_csv(enrollment_path + 'mn_enrollment_2013_cleaned_specialpop.csv')
df2 = pd.read_csv(enrollment_path + 'mn_enrollment_2013_cleaned_gendereth.csv')
df_2013 = df_2013.append(df2, ignore_index = True, sort = True)
df_2013['year'] = df_2013.apply(lambda x: '2013', axis = 1)

In [12]:
gender_groups = [
    'AMI'
    , 'API'
    , 'HIS'
    , 'BLK'
    , 'WHT'
]
# combine gender columns
for group in gender_groups:
    df_2013[group] = df_2013.apply(lambda x: x[group + '_Male'] + x[group + '_Female'], axis = 1)

In [13]:
# reshape 2013 data
df_2013 = pd.melt(df_2013, id_vars=['year', 'district_id', 'district', 'school_id', 'school', 'grade']
        , value_vars=[
            'FreeK12'
            , 'RedK12'
            , 'LEPIdentifiedK12'
            , 'LEPServedK12'
            , 'SPEK12'
            , 'EC12Enr'
            , 'Free-EC12'
            , 'Red-EC12'
            , 'LEPIdentifiedEC12'
            , 'LEPServedEC12'
            , 'SPE-EC12'
            , 'Minority'
            , 'All Groups'
            , 'AMI'
            , 'API'
            , 'HIS'
            , 'BLK'
            , 'WHT'
        ],
        var_name='group_state'
        , value_name='num')

In [15]:
# append 2013 results to all other results
df = df.append(df_2013, ignore_index = True, sort = True)

In [16]:
# export results
df.to_csv('./data/finalized/mn_enrollment.csv', index=False)