# Calculating close rate of each LEAID in each Year

In [1]:
import pandas as pd
import numpy as np

In [39]:
#read the source file
source = pd.read_csv("../../../nowdata/backups/openAndClose.csv", encoding = "latin1", low_memory=False)
sourcenew = pd.read_pickle("../../../nowdata/charters_2015.pkl")

In [40]:
source["SURVYEAR"].value_counts()

2014-2015    102799
Name: SURVYEAR, dtype: int64

In [41]:
source["YEAR_OPENED"].value_counts()

1998.0    92522
2006.0     3455
2005.0     3373
2001.0     3254
2000.0     2588
2004.0     2506
2003.0     2488
2007.0     2447
1999.0     2435
2002.0     2279
2009.0     2202
2010.0     2129
2014.0     2000
2008.0     1983
2016.0     1897
2013.0     1705
2011.0     1703
2012.0     1670
2015.0     1400
Name: YEAR_OPENED, dtype: int64

In [42]:
source["YEAR_CLOSED"].value_counts()

2007.0    2255
2003.0    2199
2009.0    2116
2011.0    1975
2010.0    1975
2006.0    1912
2004.0    1877
2008.0    1815
2013.0    1766
2012.0    1575
2005.0    1507
2014.0    1493
2001.0    1415
2002.0    1390
2016.0    1267
1998.0    1226
2000.0    1204
1999.0    1191
2015.0    1190
Name: YEAR_CLOSED, dtype: int64

In [25]:
#Create two mappings
#  1. LEAID - list of number of schools opened in each year
#  2. LEAID - list of number of schools closed in each year

numSch_map = {} #{LEAID: [year99opened, year00opened, ..., year2016opened]}
closed_map = {} #{LEAID: [year99closed, year00closed,..., year16closed]}
for index, row in source.iterrows():
    thisid = row['LEAID']
    open_year = row['YEAR_OPENED'] if not np.isnan(row['YEAR_OPENED']) else 0  #let year be 0 if not found
    close_year = row['YEAR_CLOSED'] if not np.isnan(row['YEAR_CLOSED']) else 0
    if np.isnan(thisid):
        continue
        
    if thisid in numSch_map:
        for i in range(0, 18):
            #if i is in the range of open years for some school, add it into the corresponding map
            if open_year <= 1999 + i and (close_year == 0 or close_year >= 1999 + i):
                numSch_map[thisid][i] += 1
            if close_year == 1999 + i:
                closed_map[thisid][i] += 1
    else:
        numSch_map[thisid] = []
        closed_map[thisid] = []
        for i in range(0, 18):
            numSch_map[thisid].append(0)
            closed_map[thisid].append(0)
            
    if index % 10000 == 0:
        print(index)

10000


In [26]:
# Calculating the close rate for each LEAID using the mapping we did above
close_rate = {}
for key in numSch_map.keys():
    for i in range(0, len(numSch_map[key])):
        denom = numSch_map[key][i]
        if key not in close_rate:
            #create a list of close rate values
            close_rate[key] = []
            if denom == 0:
                close_rate[key].append(0)
            else:
                close_rate[key].append(closed_map[key][i] / denom)
        else:
            if denom == 0:
                close_rate[key].append(0)
            else:
                close_rate[key].append(closed_map[key][i] / denom)

In [27]:
# Turn the close school mapping and close rate mapping into pandas dataframe
df_closeSchool = pd.DataFrame.from_dict(closed_map)
df_closeRate = pd.DataFrame.from_dict(close_rate)
df_closeSchool = df_closeSchool.transpose()
df_closeRate = df_closeRate.transpose()

In [28]:
df_closeRate

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
200001.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200180.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200210.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200390.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200510.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200570.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
200600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
400001.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
400016.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [29]:
# Create two dictionary to rename the columns
dic1 = {0:'close99', 1:'close00', 2:'close01', 3:'close02', 4:'close03', 5:'close04', 6:'close05', 7:'close06', \
       8:'close07',9:'close08', 10:'close09', 11:'close10', 12:'close11', 13:'close12', 14:'close13', 15:'close14', \
       16:'close15', 17:'close16'}
dic2 = {0:'close_rate99', 1:'close_rate00', 2:'close_rate01', 3:'close_rate02', 4:'close_rate03', 5:'close_rate04', 6:'close_rate05', 7:'close_rate06', \
       8:'close_rate07',9:'close_rate08', 10:'close_rate09', 11:'close_rate10', 12:'close_rate11', 13:'close_rate12', 14:'close_rate13', 15:'close_rate14', \
       16:'close_rate15', 17:'close_rate16'}

In [30]:
# Rename the two dataframe using the dictionaries created above
df_closeSchool = df_closeSchool.rename(columns = dic1)
df_closeRate = df_closeRate.rename(columns = dic2)

In [31]:
# Turn the LEAID from index to a new column
df_closeSchool['LEAID'] = df_closeSchool.index
df_closeRate['LEAID'] = df_closeRate.index

In [32]:
# Merge the closed school dataframe and the close rate dataframe
merged_close = pd.merge(df_closeSchool, df_closeRate, on=['LEAID'])

In [33]:
# Let the LEAID column appears at the front
mid = merged_close['LEAID']
merged_close.drop(labels=['LEAID'], axis=1,inplace = True)
merged_close.insert(0, 'LEAID', mid)

In [34]:
merged_close

Unnamed: 0,LEAID,close99,close00,close01,close02,close03,close04,close05,close06,close07,...,close_rate07,close_rate08,close_rate09,close_rate10,close_rate11,close_rate12,close_rate13,close_rate14,close_rate15,close_rate16
0,200001.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,200150.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,200180.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,200210.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,200390.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
5,200510.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
6,200570.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
7,200600.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
8,400001.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
9,400016.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [None]:
# Turn the merged dataframe to csv
merged_close.to_csv("close_rate.csv", index = False)