#### School Closure Visualization/ Descriptives

In [10]:
import numpy as np
import pandas as pd

import seaborn as sns; sns.set()
from matplotlib import pyplot as plt
%matplotlib inline

In [11]:
#Read in State ID and County ID from Raw NCES Data
ids=pd.read_table("data/ccd/2009-10/sc092a.txt")[["NCESSCH","FIPST","CONUM09","LSTATE09","CONAME09"]]
ids.rename(columns={"CONUM09":"CONUM", "LSTATE09": "STATE", "CONAME09":"COUNTY"}, inplace=True)

school=pd.read_pickle("data/school_closure_clean.pkl")
merged=pd.merge(ids, school, how="right", on="NCESSCH")

print merged.shape

(90713, 61)


#### Create TSV File for d3 Map

In [12]:
##Convert County IDs to Numeric (for D3 TopoJSON Map)
merged["CONUM"]=pd.to_numeric(merged.CONUM, errors="coerce")
merged=merged[merged.CONUM.notnull()]
merged["CONUM"] = merged["CONUM"].astype(np.int32)

In [13]:
merged["COUNTY"] = merged["COUNTY"].apply(lambda x: x.title()) ##Convert to Proper Case
merged["COUNTY"] = merged[["COUNTY", "STATE"]].apply(lambda x: ", ".join(x), axis=1) #Concatenate County and State
merged.head(5)

Unnamed: 0,NCESSCH,FIPST,CONUM,STATE,COUNTY,LEAID,LEANM,SCHNAM,LSTATE,LEVEL,...,MATH_RANK,ELA_RANK,in_ranks,intercept,CLOSED10,CLOSED11,CLOSED12,CLOSED13,CLOSED14,CLOSED
0,10000201876,1,1117,AL,"Shelby County, AL",100002,ALABAMA YOUTH SERVICES,ALABAMA YOUTH SERVICES,AL,N,...,,,0,1,0,0,0,0,0,0
1,10000500870,1,1095,AL,"Marshall County, AL",100005,ALBERTVILLE CITY,ALA AVENUE MIDDLE SCH,AL,2,...,0.520302,0.57429,1,1,0,0,0,0,0,0
2,10000500871,1,1095,AL,"Marshall County, AL",100005,ALBERTVILLE CITY,ALBERTVILLE HIGH SCH,AL,3,...,0.850174,0.713511,1,1,0,0,0,0,0,0
3,10000500879,1,1095,AL,"Marshall County, AL",100005,ALBERTVILLE CITY,EVANS ELEM SCH,AL,2,...,0.65091,0.608922,1,1,0,0,0,0,0,0
4,10000500889,1,1095,AL,"Marshall County, AL",100005,ALBERTVILLE CITY,ALBERTVILLE ELEM SCH,AL,1,...,0.831373,0.855913,1,1,0,0,0,0,0,0


In [14]:
bycounty = merged.groupby(by=["CONUM", "COUNTY"], as_index=False)[["intercept", "CLOSED"]].sum() #aggregate and sum up

bycounty.rename(columns={"CONUM":"id", "COUNTY":"county_name", "intercept":"schools", "CLOSED":"closures"}, inplace=True)
bycounty["close_rate"] = bycounty.closures / bycounty.schools
#bycounty.to_csv("graphics/d3-map/closures.tsv")
bycounty.head(25)

Unnamed: 0,id,county_name,schools,closures,close_rate
0,1001,"Autauga County, AL",12,0,0.0
1,1003,"Baldwin County, AL",43,1,0.023256
2,1005,"Barbour County, AL",9,1,0.111111
3,1007,"Bibb County, AL",10,1,0.1
4,1009,"Blount County, AL",16,0,0.0
5,1011,"Bullock County, AL",4,1,0.25
6,1013,"Butler County, AL",6,0,0.0
7,1015,"Calhoun County, AL",34,0,0.0
8,1017,"Chambers County, AL",13,0,0.0
9,1019,"Cherokee County, AL",7,0,0.0


In [None]:
bycounty = merged.groupby(by="CONUM", as_index=False)[["intercept","CLOSED"]].sum()
bycounty.rename(columns={"CLOSED":"closed", "intercept": "total"}, inplace=True)

bycounty["rate"] = bycounty.closed / bycounty.total
bycounty.rename(columns={"CONUM":"id", "intercept":"total"}, inplace=True)

bycounty[["id","rate"]].to_csv("graphics/d3-map/close_rate_bycnty.tsv", sep="\t", index=False)
bycounty.head(10)

#### Look at Counties with Highest School Closure Rates

In [None]:
view=merged.groupby(by=["STATE","COUNTY"], as_index=False)[["intercept","CLOSED"]].sum()
view.rename(columns={"CLOSED":"closed", "intercept": "total"}, inplace=True)
view["rate"] = view.closed / view.total
#view.head(10)
#view.total.describe()
view.sort_values(by="rate", ascending=False).head(10)

#### Look at Closure Rates by Different Categorical Features

In [None]:
print merged.columns
print float(merged.CLOSED.value_counts()[1]) / merged.shape[0]

In [None]:
def closure_rate(variables):
    for variable in variables:
        print merged.groupby(by=variable, as_index=False)["CLOSED"].mean()

closure_rate(["MAGNET","CHARTR","SHARED"])

In [None]:
closure_rate(["NEW_ENGLAND","MID_ATLANTIC","EAST_NORTH_CENTL","WEST_NORTH_CENTL","SOUTH_ATLANTIC","EAST_SOUTH_CENTL",\
              "WEST_SOUTH_CENTL","MOUNTAIN","PACIFIC"])

In [None]:
closure_rate(["CITY","SUBURB","TOWN","RURAL"])

In [None]:
closure_rate(["ELEM","MIDDLE","HIGH","OTHER"])

In [None]:
closure_rate(["T1_ALL","T1_SOME","T1_NONE"])

#### Paired Bar Graph of Continuous Characteristics by Open vs. Closed

In [None]:
def by_closure(variables):
    for variable in variables:
        print merged.groupby(by="CLOSED", as_index=False)[variable].mean()

by_closure(["MEMBER","PCT_MINORITY","PCT_FRL", "ST_RATIO","MATH_RANK","ELA_RANK"])

In [None]:
def graph_by_closure(df, variables):
    new_df=pd.DataFrame()
    
    for variable in variables:
        temp_df=pd.DataFrame({"var":variable, "closed": df["CLOSED"], "value": df[variable].apply(lambda x: x*100)})
        
        new_df=pd.concat([new_df, temp_df], axis=0)

    new_df["closed_txt"]="Open"
    new_df.ix[new_df.closed==1, "closed_txt"]="Closed"
      
    return sns.boxplot(x="var", y="value", hue="closed_txt", data=new_df, palette=[green, red])


plt.figure(figsize=(20,10))
green = sns.color_palette("deep")[1]
red = sns.color_palette("deep")[2]

fig = graph_by_closure(merged, ["PCT_MINORITY","PCT_FRL","MATH_RANK","ELA_RANK"])
plt.legend(loc="best", fontsize=24)

plt.xlabel(" ")
plt.ylabel("(%)", fontsize=28)
fig.set_xticklabels(["Percent \nMinority","Percent Free/ \nReduced Price Lunch",\
                     "Math \nPercentile Rank","ELA \nPercentile Rank"])
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.title("School Characteristics by Closure Status", fontsize=32)
plt.savefig("graphics/school-char-by-closure.png")