In [15]:
import pandas as pd
import numpy as np

## Year 2021

In [16]:
d21 = pd.read_csv('dropout21.csv').dropna()

## Year 2019

In [17]:
d19 = pd.read_csv('dropout19.csv').dropna()

## Year 2018

In [18]:
d18 = pd.read_csv('dropout18.csv').dropna()

## Year 2017

In [19]:
d17 = pd.read_csv('dropout17.csv').dropna()

## Merge Four Years of Drop-outs

In [20]:
d21['year'] = 2021
d19['year'] = 2019
d18['year'] = 2018
d17['year'] = 2017

school_code = list((set(d21['RCDTS']).intersection(set(d19['RCDTS']))).intersection(set(d18['RCDTS'])).intersection(set(d17['RCDTS'])))

d21 = d21.set_index('RCDTS').loc[school_code, :]
d19 = d19.set_index('RCDTS').loc[school_code, :]
d18 = d18.set_index('RCDTS').loc[school_code, :]
d17 = d17.set_index('RCDTS').loc[school_code, :]

In [21]:
final_drop = pd.concat([d21, d19, d18, d17]).reset_index()

type_map = {'HIGH SCHOOL': 0, 'ELEMENTARY': 0, 'ELEMENTARY ': 0, 'CHARTER SCH': 1}
final_drop['charter'] = final_drop['School Type'].map(type_map)

## Schooling Mode

In [22]:
mode = pd.read_csv('schoolmode.csv')[['Charter', 'StateAssignedSchoolID', 'EnrollmentTotal',
                     'TimePeriodStart', 'TimePeriodEnd', 'LearningModel']]

illi_mode = mode.groupby(['StateAssignedSchoolID'])['LearningModel'].value_counts(
    normalize = True).to_frame().rename(columns = {'LearningModel':'normalized'}).reset_index()

score_map = {'Virtual': 0, 'Hybrid': 50, 'Closed': 0, 'In-person': 100}
illi_mode['score'] = illi_mode['LearningModel'].map(score_map)

def weighted_average(df):
    return(sum(df['normalized'] * df['score']))

school_score = illi_mode.groupby(['StateAssignedSchoolID']).apply(
    weighted_average).sort_values().to_frame().reset_index().rename(columns = {0: 'schoolmode', 
                        'DistrictName':'District Name', 'SchoolName':'School Name'})

school_score.loc[:, ['schoolmode']] = school_score.loc[:, ['schoolmode']] / 100
school_score.loc[:, ['schoolmode']] = 1 - school_score.loc[:, ['schoolmode']]

In [23]:
norm_mode = mode.groupby(['StateAssignedSchoolID'])['LearningModel'].value_counts(
    normalize = True).to_frame().rename(columns = {'LearningModel': 'normalized'}).reset_index()
pivot_df = pd.pivot(norm_mode, index = ['StateAssignedSchoolID'], 
         columns = 'LearningModel', values = 'normalized').reset_index()
pivot_df = pivot_df.replace(np.nan, 0)
new_illi_mode = pivot_df.merge(school_score, on = 'StateAssignedSchoolID')
new_illi_mode = new_illi_mode.rename(columns = {
    'Closed':'closedper','Hybrid':'hybridper','In-person':'inpersonper',
          'Virtual':'virtualper'})
new_illi_mode.to_csv('schoolmode_il.csv')

## Merge drop-out and school mode

In [24]:
mode = pd.read_csv('schoolmode_il.csv').iloc[:, 1:]
for i in list(final_drop.RCDTS):
    if i[0] == '0':
        final_drop.loc[final_drop['RCDTS'] == i, 'RCDTS'] = i[1:]
        
drop_mode = final_drop.set_index('RCDTS').merge(mode.set_index('StateAssignedSchoolID'), 
                                        left_index = True, right_index = True).reset_index().rename(columns = 
                                                                                                   {'index':
                                                                                                   'RCDTS'})

drop_mode.loc[drop_mode['year'] != 2021, 'schoolmode'] = 0
drop_mode.loc[drop_mode['year'] != 2021, 'virtualper'] = 0
drop_mode.loc[drop_mode['year'] != 2021, 'inpersonper'] = 1
drop_mode.loc[drop_mode['year'] != 2021, 'hybridper'] = 0
drop_mode.loc[drop_mode['year'] != 2021, 'closedper'] = 0


In [50]:
drop_mode['schoolcode'] = drop_mode['RCDTS'].astype('str') + 'il'
drop_mode['countycode'] = drop_mode.RCDTS.apply(lambda x: x[:5]) + 'il'
drop_mode['districtcode'] = drop_mode.RCDTS.apply(lambda x: x[:8]) + 'il'
drop_mode['year'] = drop_mode['year'].astype('str')
drop_mode.loc[:, ['dropout', 'white', 'black', 'hispanic', 'lowincome']] = round(
drop_mode.loc[:, ['dropout', 'white', 'black', 'hispanic', 'lowincome']], 2)
drop_mode.loc[:, ['hybridper', 'virtualper', 'schoolmode']] = round(
drop_mode.loc[:, ['hybridper', 'virtualper', 'schoolmode']], 4)


In [52]:
drop_mode.columns

Index(['RCDTS', 'School Name', 'District', 'City', 'County', 'School Type',
       'totalenrolled', 'white', 'black', 'hispanic', 'lowincome', 'dropout',
       'year', 'charter', 'closedper', 'hybridper', 'inpersonper',
       'virtualper', 'schoolmode', 'schoolcode', 'countycode', 'districtcode'],
      dtype='object')

In [57]:
drop_mode = drop_mode.drop(columns = ['RCDTS', 'School Name', 'District', 'City', 'County', 'School Type', 'closedper',
                         'inpersonper'])
drop_mode['state'] = 'il'
drop_mode.to_csv('final_data_drop_il.csv')