# Calculating close rate of each LEAID in each Year

In [2]:
import pandas as pd
import numpy as np

In [3]:
#read the source file
source = pd.read_csv("openAndClose.csv", encoding = "latin1")

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
#Create two mappings
#  1. LEAID - list of number of schools opened in each year
#  2. LEAID - list of number of schools closed in each year

numSch_map = {} #{LEAID: [year99opened, year00opened, ..., year2016opened]}
closed_map = {} #{LEAID: [year99closed, year00closed,..., year16closed]}
for index, row in source.iterrows():
    thisid = row['LEAID']
    open_year = row['YEAR_OPENED'] if not np.isnan(row['YEAR_OPENED']) else 0  #let year be 0 if not found
    close_year = row['YEAR_CLOSED'] if not np.isnan(row['YEAR_CLOSED']) else 0
    if np.isnan(thisid):
        continue
        
    if thisid in numSch_map:
        for i in range(0, 18):
            #if i is in the range of open years for some school, add it into the corresponding map
            if open_year <= 1999 + i and (close_year == 0 or close_year >= 1999 + i):
                numSch_map[thisid][i] += 1
            if close_year == 1999 + i:
                closed_map[thisid][i] += 1
    else:
        numSch_map[thisid] = []
        closed_map[thisid] = []
        for i in range(0, 18):
            numSch_map[thisid].append(0)
            closed_map[thisid].append(0)
            
    if index % 10000 == 0:
        print(index)

0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


In [5]:
# Calculating the close rate for each LEAID using the mapping we did above
close_rate = {}
for key in numSch_map.keys():
    for i in range(0, len(numSch_map[key])):
        denom = numSch_map[key][i]
        if key not in close_rate:
            #create a list of close rate values
            close_rate[key] = []
            if denom == 0:
                close_rate[key].append(0)
            else:
                close_rate[key].append(closed_map[key][i] / denom)
        else:
            if denom == 0:
                close_rate[key].append(0)
            else:
                close_rate[key].append(closed_map[key][i] / denom)

In [6]:
# Turn the close school mapping and close rate mapping into pandas dataframe
df_closeSchool = pd.DataFrame.from_dict(closed_map)
df_closeRate = pd.DataFrame.from_dict(close_rate)
df_closeSchool = df_closeSchool.transpose()
df_closeRate = df_closeRate.transpose()

In [7]:
# Create two dictionary to rename the columns
dic1 = {0:'close99', 1:'close00', 2:'close01', 3:'close02', 4:'close03', 5:'close04', 6:'close05', 7:'close06', \
       8:'close07',9:'close08', 10:'close09', 11:'close10', 12:'close11', 13:'close12', 14:'close13', 15:'close14', \
       16:'close15', 17:'close16'}
dic2 = {0:'close_rate99', 1:'close_rate00', 2:'close_rate01', 3:'close_rate02', 4:'close_rate03', 5:'close_rate04', 6:'close_rate05', 7:'close_rate06', \
       8:'close_rate07',9:'close_rate08', 10:'close_rate09', 11:'close_rate10', 12:'close_rate11', 13:'close_rate12', 14:'close_rate13', 15:'close_rate14', \
       16:'close_rate15', 17:'close_rate16'}

In [8]:
# Rename the two dataframe using the dictionaries created above
df_closeSchool = df_closeSchool.rename(columns = dic1)
df_closeRate = df_closeRate.rename(columns = dic2)

In [9]:
# Turn the LEAID from index to a new column
df_closeSchool['LEAID'] = df_closeSchool.index
df_closeRate['LEAID'] = df_closeRate.index

In [10]:
# Merge the closed school dataframe and the close rate dataframe
merged_close = pd.merge(df_closeSchool, df_closeRate, on=['LEAID'])

In [11]:
# Let the LEAID column appears at the front
mid = merged_close['LEAID']
merged_close.drop(labels=['LEAID'], axis=1,inplace = True)
merged_close.insert(0, 'LEAID', mid)

In [12]:
merged_close

Unnamed: 0,LEAID,close99,close00,close01,close02,close03,close04,close05,close06,close07,...,close_rate07,close_rate08,close_rate09,close_rate10,close_rate11,close_rate12,close_rate13,close_rate14,close_rate15,close_rate16
0,100002.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.166667,0.000000,0.000000
1,100005.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2,100006.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
3,100007.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.058824,0.000000,0.000000
4,100008.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
5,100009.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
6,100011.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
7,100012.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
8,100013.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.200000
9,100015.0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000


In [None]:
# Turn the merged dataframe to csv
merged_close.to_csv("close_rate.csv", index = False)