In [26]:
import pandas as pd
import numpy as np
import util

enrollment_path = './data/co/enrollment/'

years = [
    2015
    , 2016
    , 2017
    , 2018
    , 2019
]

enrollment_files = [
    'co_enrollment_2015_cleaned.csv'
    , 'co_enrollment_2016_cleaned.csv'
    , 'co_enrollment_2017_cleaned.csv'
    , 'co_enrollment_2018_cleaned.csv'
    , 'co_enrollment_2019_cleaned.csv'
]

enrollment_frl_files = [
    'co_enrollment_frl_2015_cleaned.csv'
    , 'co_enrollment_frl_2016_cleaned.csv'
    , 'co_enrollment_frl_2017_cleaned.csv'
    , 'co_enrollment_frl_2018_cleaned.csv'
    , 'co_enrollment_frl_2019_cleaned.csv'
]

enrollment_ipst_files = [
    'co_enrollment_ipst_2018_cleaned.csv'
    , 'co_enrollment_ipst_2019_cleaned.csv'
]

# import main enrollment files
df = pd.read_csv(enrollment_path + enrollment_files[0])
df['year'] = df.apply(lambda x: years[0], axis=1)

for i in range(1, len(enrollment_files)):
    df2 = pd.read_csv(enrollment_path + enrollment_files[i])
    df2['year'] = df2.apply(lambda x: years[i], axis=1)
    df = df.append(df2, ignore_index = True, sort=True)

# import frl files
df_frl = pd.read_csv(enrollment_path + enrollment_frl_files[0])
df_frl['year'] = df_frl.apply(lambda x: years[0], axis=1)

for i in range(1, len(enrollment_frl_files)):
    df2 = pd.read_csv(enrollment_path + enrollment_frl_files[i])
    df2['year'] = df2.apply(lambda x: years[i], axis=1)
    df_frl = df_frl.append(df2, ignore_index = True, sort=True)

# import ipst files
df_ipst = pd.read_csv(enrollment_path + enrollment_ipst_files[0])
df_ipst['year'] = df_ipst.apply(lambda x: 2018, axis=1)
df2 = pd.read_csv(enrollment_path + enrollment_ipst_files[1])
df2['year'] = df2.apply(lambda x: 2019, axis=1)
df_ipst = df_ipst.append(df2, ignore_index = True, sort=True)

In [31]:
# rename grade Sch Total to All Grades
df['temp'] = [x if x != 'Sch Total' else 'All Grades' for x in df['grade']]
del df['grade']
df = df.rename(columns={"temp": "grade"})
df['temp'] = [x if x != 'ALL GRADES TOTAL' else 'All Grades' for x in df['grade']]
del df['grade']
df = df.rename(columns={"temp": "grade"})
df['temp'] = [x if x != 'ALL GRADE LEVELS' else 'All Grades' for x in df['grade']]
del df['grade']
df = df.rename(columns={"temp": "grade"})
# remove NaN values 
df['temp'] = [x if x == x else 'All Grades' for x in df['grade']]
del df['grade']
df = df.rename(columns={"temp": "grade"})

In [32]:
# remove district level results (school_id/school is null)
df = df[~df['school_id'].isna()]

In [41]:
util.fieldTypes(df, ['American Indian or Alaskan Native F', 'American Indian or Alaskan Native M'])

<class 'numpy.int64'>
<class 'numpy.int64'>


In [42]:
# define subgroups
subgroups = [
    'American Indian or Alaskan Native'
    , 'Asian'
    , 'Black or African American'
    , 'Hispanic or Latino'
    , 'Native Hawaiian or Other Pacific Islander'
    , 'Two or More Races'
    , 'White'
]

# create function to combine gender columns
def combineGenders (row, column1, column2):
    return int(str(row[column1]).replace(',', '')) + int(str(row[column2]).replace(',', ''))

# combine genders for every subgroup
for subgroup in subgroups:
    df[subgroup] = df.apply(lambda row: combineGenders(row, subgroup + ' F', subgroup + ' M'), axis=1)

In [44]:
# reshape data
df = pd.melt(df, id_vars=['year', 'district_id', 'district', 'school_id', 'school', 'grade']
        , value_vars=[
            'American Indian or Alaskan Native'
            , 'Asian'
            , 'Black or African American'
            , 'Hispanic or Latino'
            , 'Native Hawaiian or Other Pacific Islander'
            , 'Two or More Races'
            , 'White'
            , 'All Groups'],
        var_name='group_state'
        , value_name='num')

In [46]:
# add grade column
df_frl['grade'] = df_frl.apply(lambda x: 'All Grades', axis = 1)

# reshape frl
df_frl = pd.melt(df_frl, id_vars=['year', 'district_id', 'district', 'school_id', 'school', 'grade']
        , value_vars=[
            'FREE AND REDUCED'
            , 'FREE LUNCH'
            , 'REDUCED LUNCH'],
        var_name='group_state'
        , value_name='num')

   year  district_id    district  school_id  \
0  2015         10.0  MAPLETON 1        0.0   
1  2015         10.0  MAPLETON 1      187.0   
2  2015         10.0  MAPLETON 1      212.0   
3  2015         10.0  MAPLETON 1      263.0   
4  2015         10.0  MAPLETON 1      309.0   

                                      school       grade       group_state  \
0                               Not a school  All Grades  FREE AND REDUCED   
1  MAPLETON EXPEDITIONARY SCHOOL OF THE ARTS  All Grades  FREE AND REDUCED   
2         MAPLETON EARLY COLLEGE HIGH SCHOOL  All Grades  FREE AND REDUCED   
3                  GLOBAL LEADERSHIP ACADEMY  All Grades  FREE AND REDUCED   
4                        ACADEMY HIGH SCHOOL  All Grades  FREE AND REDUCED   

   num  
0    0  
1  366  
2  145  
3  491  
4  233  


In [47]:
# add grade column
df_ipst['grade'] = df_ipst.apply(lambda x: 'All Grades', axis = 1)

# reshape frl
df_ipst = pd.melt(df_ipst, id_vars=['year', 'district_id', 'district', 'school_id', 'school', 'grade']
        , value_vars=[
            'Special Education'
            , 'Section 504'
            , 'Online'
            , 'Minority'
            , 'Migrant'
            , 'Immigrant'
            , 'Homeless'
            , 'Gifted and Talented'
            , 'EL'],
        var_name='group_state'
        , value_name='num')
print(df_ipst.head())

   year  district_id    district  school_id  \
0  2018           10  MAPLETON 1        187   
1  2018           10  MAPLETON 1        212   
2  2018           10  MAPLETON 1        263   
3  2018           10  MAPLETON 1        309   
4  2018           10  MAPLETON 1        501   

                                      school       grade        group_state  \
0  MAPLETON EXPEDITIONARY SCHOOL OF THE ARTS  All Grades  Special Education   
1         MAPLETON EARLY COLLEGE HIGH SCHOOL  All Grades  Special Education   
2                  GLOBAL LEADERSHIP ACADEMY  All Grades  Special Education   
3                        ACADEMY HIGH SCHOOL  All Grades  Special Education   
4                  MONTEREY COMMUNITY SCHOOL  All Grades  Special Education   

    num  
0  71.0  
1  34.0  
2  36.0  
3  52.0  
4  66.0  


In [48]:
# append dfs
df = df.append(df_frl, ignore_index=True, sort=True)
df = df.append(df_ipst, ignore_index=True, sort=True)

In [49]:
# export data
df.to_csv('./data/finalized/co_enrollment.csv', index=False)