# Enrollment_and_Forecast_History

E. Quinn 4/29/2021

Enrollment data and projections from various sources

### Import standard python datascience packages

In [None]:
import sys
import math
import re
import copy as cp
import numpy as np
import scipy as sc
import pandas as pd
import matplotlib.pyplot as plt
#plt.switch_backend('WebAgg')
import seaborn as sns
import pickle
%matplotlib inline

In [None]:
from datetime import datetime, timedelta, date
from datascience import *
import uuid
import random

In [None]:
from scipy.stats import nbinom
from scipy.stats import poisson
from scipy.stats import binom
from scipy.stats import betabinom
import numpy.random as npr

### Set path to data files

In [None]:
data_path = '../'
!pwd

### Define enrollment dictionary

In [None]:
edict = {}

for year in np.arange(1990,2050):
    edict[year] = {}
    edict[year]['school_year'] = str(year) + '-' + str(year+1)
    edict[year]['grades'] = {}
    
#edict

### SBS Births data 1990-2005

In [None]:
sbsb = pd.read_table('../SBS_2006_Demographics.csv',sep=',')
sbsbd = sbsb.to_dict()

for i in sbsbd['Year'].keys():
    year = sbsbd['Year'][i]
    births = sbsbd['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if 'actual' not in edict[year]['grades']['Birth'].keys():
        edict[year]['grades']['Birth']['actual'] = {}
    edict[year]['grades']['Birth']['actual']['SBS'] = births     

### SBS  Total Enrollment Forecasts

In [None]:
sbst = pd.read_table('../SBS_2006_Forecasts.csv',sep=',')
sbstd = sbst.to_dict()
        
for i in sbstd['Syear'].keys():
    year = int(sbstd['Syear'][i][:4])
    count = sbstd['Total'][i]
    Type = sbstd['Type'][i]
    if 'Total_enrollment' not in edict[year].keys():
        edict[year]['Total_enrollment'] = {}
    if Type not in edict[year]['Total_enrollment'].keys():
        edict[year]['Total_enrollment'][Type] = {}
    edict[year]['Total_enrollment'][Type]['SBS'] = count

### NESDEC 2020-2021

In [None]:
nesd2021 = pd.read_table('../NESDEC_2020_2021.csv',sep=',')
nesd2021d = nesd2021.to_dict()

for i in nesd2021d['Byear'].keys():
    year = nesd2021d['Byear'][i]
    births = nesd2021d['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= 2019):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['NESD2021'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['NESD2021'] = births
        
for i in nesd2021d['Year'].keys():
    year = nesd2021d['Year'][i]
    for grade in nesd2021d.keys():
        if grade not in ['Year','Byear','Births']:
            count = nesd2021d[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= 2020):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['NESD2021'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['NESD2021'] = count

### NESDEC 2018-2019

In [None]:
nesd1819 = pd.read_table('../NESDEC_2018_2019.csv',sep=',')
nesd1819d = nesd1819.to_dict()

actual_births_through = 2016
actual_enrollment_through = 2018

for i in nesd1819d['Byear'].keys():
    year = nesd1819d['Byear'][i]
    births = nesd1819d['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= actual_births_through):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['NESD1819'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['NESD1819'] = births
        
for i in nesd1819d['Year'].keys():
    year = nesd1819d['Year'][i]
    for grade in nesd1819d.keys():
        if grade not in ['Year','Byear','Births']:
            count = nesd1819d[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= actual_enrollment_through):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['NESD1819'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['NESD1819'] = count

### RIDE October Enrollment

In [None]:
od = {}

October = pd.read_csv(data_path + 'RIDE_October_enrollment.csv')

October.head()

ocd = October.to_dict()

for i in ocd['Year'].keys():
    year = ocd['Year'][i]
    grade = ocd['Grade'][i]
    count = ocd['Total'][i]
    if (grade == 'KF'):                   #recode 'KF' to 'K' - full day kindergarten
        grade = 'K'
    if (grade == 'PK'):                   #recode 'PK' to 'PKG'  not full day Pre-K
        grade = 'PKG'
    if (grade == 'PF'):                   #recode 'PF' to 'PK'   full day pre-K
        grade = 'PK'
    
    if (grade == 'TO'):
        if 'Total_enrollment' not in edict[year].keys():
            edict[year]['Total_enrollment'] = {}
        if 'actual' not in edict[year]['Total_enrollment'].keys():
            edict[year]['Total_enrollment']['actual'] = {}
        edict[year]['Total_enrollment']['actual']['RIDE_Oct'] = count
    else:
        if grade not in edict[year]['grades'].keys():
            edict[year]['grades'][grade] = {}
        if 'actual' not in edict[year]['grades'][grade].keys():
            edict[year]['grades'][grade]['actual'] = {}
        edict[year]['grades'][grade]['actual']['RIDE_Oct'] = count

### Read 2019 Milone and MacBroom Historical Data

In [None]:
mmbhist = pd.read_table(data_path+'Milone_MacBroom2019_Hist.csv',sep=',')
mmbhistd = mmbhist.to_dict()

actuals_through = 2018

for i in mmbhistd['Byear'].keys():
    year = mmbhistd['Byear'][i]
    births = mmbhistd['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= actuals_through):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['M_and_M'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['M_and_M'] = births
        
for i in mmbhistd['Year'].keys():
    year = mmbhistd['Year'][i]
    for grade in mmbhistd.keys():
        if grade not in ['Year','Byear','Births']:
            count = mmbhistd[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= actuals_through):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['M_and_M'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['M_and_M'] = count

### Milone and MacBroom Projections

In [None]:
mmbhist = pd.read_table(data_path+'Milone_MacBroom2019_Proj.csv',sep=',')
mmbhistd = mmbhist.to_dict()

actuals_through = 2018

for i in mmbhistd['Byear'].keys():
    year = mmbhistd['Byear'][i]
    births = mmbhistd['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= actuals_through):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['M_and_M'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['M_and_M'] = births
        
for i in mmbhistd['Year '].keys():
    year = mmbhistd['Year '][i]
    for grade in mmbhistd.keys():
        if grade not in ['Year ','Byear','Births']:
            count = mmbhistd[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= actuals_through):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['M_and_M'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['M_and_M'] = count

### Read 2019 Milone and MacBroom Low End Projections

In [None]:
mmbp_Low = pd.read_table(data_path+'Milone_MacBroom2019_Proj_L.csv',sep=',')
mmbp_Lowd = mmbp_Low.to_dict()

actuals_through = 2018

for i in mmbp_Lowd['Byear'].keys():
    year = mmbp_Lowd['Byear'][i]
    births = mmbp_Lowd['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= actuals_through):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['M_and_M_low'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['M_and_M_low'] = births
        
for i in mmbp_Lowd['Year'].keys():
    year = mmbp_Lowd['Year'][i]
    for grade in mmbp_Lowd.keys():
        if grade not in ['Year','Byear','Births']:
            count = mmbp_Lowd[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= actuals_through):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['M_and_M_low'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['M_and_M_low'] = count

### Read 2019 Milone and MacBroom High End Projections

In [None]:
mmbp_High = pd.read_table(data_path+'Milone_MacBroom2019_Proj_H.csv',sep=',')
mmbp_Highd = mmbp_High.to_dict()

actuals_through = 2018

for i in mmbp_Highd['Byear'].keys():
    year = mmbp_Highd['Byear'][i]
    births = mmbp_Highd['Births'][i]
    if 'Birth' not in edict[year]['grades'].keys():
        edict[year]['grades']['Birth'] = {}
    if (year <= actuals_through):
        if 'actual' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['actual'] = {}
        edict[year]['grades']['Birth']['actual']['M_and_M_high'] = births
    else:
        if 'forecast' not in edict[year]['grades']['Birth'].keys():
            edict[year]['grades']['Birth']['forecast'] = {}
        edict[year]['grades']['Birth']['forecast']['M_and_M_high'] = births
        
for i in mmbp_Highd['Year'].keys():
    year = mmbp_Highd['Year'][i]
    for grade in mmbp_Highd.keys():
        if grade not in ['Year','Byear','Births']:
            count = mmbp_Highd[grade][i]
            if grade not in edict[year]['grades'].keys():
                edict[year]['grades'][grade] = {}
            if (year <= actuals_through):
                if  'actual' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['actual'] = {}
                edict[year]['grades'][grade]['actual']['M_and_M_high'] = count
            else:
                if  'forecast' not in edict[year]['grades'][grade].keys():
                    edict[year]['grades'][grade]['forecast'] = {}
                edict[year]['grades'][grade]['forecast']['M_and_M_high'] = count

### Save to pickle Enrollment_and_Forecast_History_mm_dd_yyyy.pkl

In [None]:
current_date = date.today()
fname = '../Enrollment_and_Forecast_History_' + str(current_date.month) + '_' + \
    str(current_date.day) + '_' + str(current_date.year) + '.pkl'
with open(fname, 'wb') as handle:
    pickle.dump(edict, handle)