In [71]:
import pandas as pd, re
import getschools as gs


In [72]:
def clean_race_cols(df):
    race_aliases = {
        "black":["black_not_of_hispanic_origin","black_or_african_american"],
        "white":["white_not_of_hispanic_origin"],
        "asian":[],
        "hispanic":["hispanic_latino","hispanic_latino_of_any_race"],
        "other":["american_indian","american_indian_or_alaska_native","native_hawaiian_or_other_pacific_islander",
                 "not_reported","two_or_more_races"]
    }
    
    ret = df.copy()
    ret["tot_check"] = pd.to_numeric(ret["total"],errors="coerce").fillna(0)
    
    for k in race_aliases:
        print k
        if k not in ret.columns:
            ret[k] = 0
        else:
            ret[k] = pd.to_numeric(ret[k], errors="coerce").fillna(0)
            print "starting ", k, "with", k
        for alias in race_aliases[k]:
            try:
                print "Adding ", alias, " to ", k
                ret[alias] = pd.to_numeric(ret[alias],errors="coerce").fillna(0)
                ret[k] += ret[alias]
            except:
                pass
        ret["tot_check"] -= pd.to_numeric(ret[k],errors="coerce").fillna(0)
        print "tot_check sum", ret["tot_check"].sum()


    ret["oth_total"] = pd.to_numeric(ret["total"],errors="coerce").fillna(0)\
    - ret["black"] - ret["white"] - ret["hispanic"] - ret["asian"]
    
    ret["oth_tot_check"] = pd.to_numeric(ret["total"],errors="coerce").fillna(0)\
    - ret["black"] - ret["white"] - ret["hispanic"] - ret["asian"] - ret["oth_total"]
    
    return ret
    

In [73]:
def get_sheet(yearstr, subgroup):
    
    ret = pd.read_csv("../scripts/brute/csv/EnrollmentYearExport-"+yearstr+"-"+subgroup+".csv")
    ret["year"] = yearstr
    
    ret["district_code"] = ret["district_code"].astype(str)
    ret["school_code"] = ret["school_code"].astype(str)
    
    ret["district_code"] = ret["district_code"].str.replace("\"","")
    ret["school_code"] = ret["school_code"].str.replace("\"","")

    ret["district_code"] = ret["district_code"].astype(str).str.zfill(7)
    ret["school_code"] = ret["school_code"].astype(str).str.zfill(7)

    #print "Adding " + str(len(ret.index)) + " rows"

    return ret

get_sheet("2013-14","ELL").head()


Unnamed: 0,school_code,district_code,district,school,no,yes,total,year
0,2880113,2880013,Achievement First Hartford Academy Inc. District,Achievement First Hartford Academy Inc.,840,30,870,2013-14
1,2795113,2790013,Amistad Academy District,Amistad Academy,835,102,937,2013-14
2,10111,10011,Andover School District,Andover Elementary School,*,*,298,2013-14
3,26111,20011,Ansonia School District,Ansonia High School,589,13,602,2013-14
4,25111,20011,Ansonia School District,Ansonia Middle School,484,9,493,2013-14


In [76]:
years = map(lambda x: "20" + str(x).zfill(2) + "-" + str(x+1).zfill(2),range(7,17))

groups = {
    "ell":"ELL",
    "gender":"Gender",
    "grade":"Grade",
    "lunch":"Lunch",
    "race":"Race",
    "sped":"Special+Education+"
}

def make_trend(subgroup):
    
    
    sheets = []
    for y in years:
        sheets.append(get_sheet(y,subgroup))
    
    #return len(sheets)
    ret = pd.concat(sheets)
    
    return ret

for g in groups:
    
    df = make_trend(groups[g])

    if g == "race":
        df = clean_race_cols(df)
        df.to_csv("../clean/for_db/enrollment_trend_race.csv", index=False)
    
    df.to_csv("../clean/csv/enrollment-trend-" + g + ".csv",
                                 index=False)

hispanic
Adding  hispanic_latino  to  hispanic
Adding  hispanic_latino_of_any_race  to  hispanic
tot_check sum 4431770.0
white
starting  white with white
Adding  white_not_of_hispanic_origin  to  white
tot_check sum 1113415.0
other
Adding  american_indian  to  other
Adding  american_indian_or_alaska_native  to  other
Adding  native_hawaiian_or_other_pacific_islander  to  other
Adding  not_reported  to  other
Adding  two_or_more_races  to  other
tot_check sum 1039746.0
black
Adding  black_not_of_hispanic_origin  to  black
Adding  black_or_african_american  to  black
tot_check sum 332328.0
asian
starting  asian with asian
tot_check sum 117012.0


#### 

In [77]:
clean_race_cols(make_trend("race"))["tot_check"].value_counts()

hispanic
Adding  hispanic_latino  to  hispanic
Adding  hispanic_latino_of_any_race  to  hispanic
tot_check sum 4431770.0
white
starting  white with white
Adding  white_not_of_hispanic_origin  to  white
tot_check sum 1113415.0
other
Adding  american_indian  to  other
Adding  american_indian_or_alaska_native  to  other
Adding  native_hawaiian_or_other_pacific_islander  to  other
Adding  not_reported  to  other
Adding  two_or_more_races  to  other
tot_check sum 1039746.0
black
Adding  black_not_of_hispanic_origin  to  black
Adding  black_or_african_american  to  black
tot_check sum 332328.0
asian
starting  asian with asian
tot_check sum 117012.0


0.0      2811
7.0       764
6.0       718
8.0       674
5.0       640
9.0       609
4.0       604
3.0       552
10.0      544
11.0      470
2.0       417
12.0      347
13.0      294
14.0      278
15.0      242
16.0      218
17.0      172
18.0      150
19.0      138
20.0      119
21.0      112
22.0      106
23.0       98
25.0       63
27.0       62
24.0       59
26.0       52
28.0       52
29.0       47
31.0       44
         ... 
94.0        1
75.0        1
145.0       1
100.0       1
72.0        1
122.0       1
81.0        1
159.0       1
141.0       1
132.0       1
113.0       1
245.0       1
88.0        1
229.0       1
232.0       1
82.0        1
107.0       1
160.0       1
111.0       1
133.0       1
102.0       1
98.0        1
96.0        1
99.0        1
109.0       1
116.0       1
108.0       1
110.0       1
148.0       1
114.0       1
Name: tot_check, dtype: int64