# Calculating the district level PVI

In [1]:
import pandas as pd, numpy as np, scipy as sp

In [3]:
#Read the source file
source = pd.read_csv("pubschools_merged_2015.csv", encoding = "latin1")

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
#pring all the columns that the source has
print(list(source.columns))

['SURVYEAR', 'FIPST', 'STABR', 'SEANAME', 'LEAID', 'ST_LEAID', 'SCHID', 'ST_SCHID', 'NCESSCH', 'MSTREET1', 'MSTREET2', 'MSTREET3', 'MCITY', 'MSTATE', 'MZIP', 'MZIP4', 'PHONE', 'LSTREET1', 'LSTREET2', 'LSTREET3', 'LCITY', 'LSTATE', 'LZIP', 'LZIP4', 'UNION', 'OUT_OF_STATE_FLAG', 'SCH_TYPE_TEXT', 'SCH_TYPE', 'RECON_STATUS', 'GSLO', 'GSHI', 'LEVEL', 'VIRTUAL', 'BIES', 'SY_STATUS_TEXT', 'SY_STATUS', 'UPDATED_STATUS_TEXT', 'UPDATED_STATUS', 'EFFECTIVE_DATE', 'CHARTER_TEXT', 'PKOFFERED', 'KGOFFERED', 'G1OFFERED', 'G2OFFERED', 'G3OFFERED', 'G4OFFERED', 'G5OFFERED', 'G6OFFERED', 'G7OFFERED', 'G8OFFERED', 'G9OFFERED', 'G10OFFERED', 'G11OFFERED', 'G12OFFERED', 'G13OFFERED', 'AEOFFERED', 'UGOFFERED', 'NOGRADES', 'CHARTAUTH1', 'CHARTAUTHN1', 'CHARTAUTH2', 'CHARTAUTHN2', 'IGOFFERED', 'WEBSITE', 'FRELCH', 'REDLCH', 'PK', 'KG', 'G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08', 'G09', 'G10', 'G11', 'G12', 'G13', 'UG', 'AE', 'TOTAL', 'AMPKM', 'AMPKF', 'ASPKM', 'ASPKF', 'HIPKM', 'HIPKF', 'BLPKM', '

In [5]:
# Create three mapping for the columns in source
#  1. LEAID - the number of schools
#  2. LEAID - the sum of all PVI score in 2014 
#  3. LEAID - the sum of all PVI score in 2017
numsch_map = {}
pvi2014_map = {}
pvi2017_map = {}

#iterate through the source
for index, row in source.iterrows():
    thisid = row['LEAID']
    pvi2014 = row['PVI2014'] if not np.isnan(row['PVI2014']) else 0 #set PVI to 0 if not found
    pvi2017 = row['PVI2017'] if not np.isnan(row['PVI2017']) else 0
    if np.isnan(thisid):
        continue
    if thisid in numsch_map:
        pvi2014_map[thisid] += pvi2014   #summing the PVIs 
        pvi2017_map[thisid] += pvi2017
        numsch_map[thisid] += 1
    else:
        pvi2014_map[thisid] = pvi2014
        pvi2017_map[thisid] = pvi2017
        numsch_map[thisid] = 1
    if index % 10000 == 0:
        print(index)                     #showing the progress

0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


In [6]:
#check the mapping for a random LEAID

print(pvi2014_map[100007.0])
print(pvi2017_map[100007.0])
print(numsch_map[100007.0])

-100.0
-110.0
17


In [7]:
# create two dictionaries to match 
#  1.LEAID - the average PVI in 2014 in that school district
#  2.LEAID - the average PVI in 2017 in that school district
average14 = {}
average17 = {}
for element in numsch_map.keys():
    average14[element] = pvi2014_map[element] / numsch_map[element]
    average17[element] = pvi2017_map[element] / numsch_map[element]

In [8]:
# check the two dictionaries
print(average14[100007.0])
print(average17[100007.0])

-5.882352941176471
-6.470588235294118


In [9]:
# turn the two dictionaries into pandas dataframe
# data is dictionary used to specify the key name and corresponding values in map
ave_14_df = pd.DataFrame(data = {'LEAID': list(average14.keys()), 'SD_lv_PVI_2014': list(average14.values())})
ave_17_df = pd.DataFrame(data = {'LEAID': list(average17.keys()), 'SD_lv_PVI_2017': list(average17.values())})

In [10]:
#check some random values
ave_14_df[ave_14_df.LEAID == 100007.0]

Unnamed: 0,LEAID,SD_lv_PVI_2014
3,100007.0,-5.882353


In [11]:
ave_17_df[ave_17_df.LEAID == 100007.0]

Unnamed: 0,LEAID,SD_lv_PVI_2017
3,100007.0,-6.470588


In [13]:
ave_14_df[ave_14_df.LEAID == 100002.0]

Unnamed: 0,LEAID,SD_lv_PVI_2014
0,100002.0,-3.833333


In [14]:
#merge the mapped dataframe PVI2014 to our source
merged1 = pd.merge(source, ave_14_df, how = 'left', on = ['LEAID'])

In [15]:
merged1

Unnamed: 0,SURVYEAR,FIPST,STABR,SEANAME,LEAID,ST_LEAID,SCHID,ST_SCHID,NCESSCH,MSTREET1,...,LSTATE16,LZIP16,ADDRESS16,ALL_RLA00NUMVALID_1516,ALL_RLA00PCTPROF_1516,ALL_MTH00NUMVALID_1516,ALL_MTH00PCTPROF_1516,ess_strength,prog_strength,SD_lv_PVI_2014
0,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1876.0,210-0001,1.000020e+10,1299 Hillsboro Parkway,...,AL,36057.0,"Central Office Bldg , Mt Meigs AL 36057.0",,,,,,,-3.833333
1,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1706.0,210-0040,1.000020e+10,8950 Roebuck Blvd,...,AL,35206.0,"8950 Roebuck Blvd , Birmingham AL 35206.0",,,,,,,-3.833333
2,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1670.0,210-0060,1.000020e+10,P O Box 66,...,AL,36784.0,"2109 Bashi Rd Bldg 509 , Thomasville AL 36784.0",,,,,,,-3.833333
3,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,277.0,210-0020,1.000020e+10,P O Box 9486,...,AL,35220.0,"1000 Industrial School Road , Birmingham AL 3...",,,,,,,-3.833333
4,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1705.0,210-0030,1.000020e+10,P O Box 66,...,AL,36057.0,"1000 Industrial School Road , Mount Meigs AL ...",,,,,,,-3.833333
5,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1667.0,210-0050,1.000020e+10,P O Box 66,...,AL,36067.0,"1601 County Rd. 57 , Prattville AL 36067.0",,,,,,,-3.833333
6,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,879.0,101-0110,1.000050e+10,901 W McKinney Ave,...,AL,35950.0,"901 W McKinney Ave , Albertville AL 35950.0",759.0,30,772.0,43,,,28.000000
7,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,1616.0,101-0035,1.000050e+10,257 Country Club Rd,...,AL,35951.0,"257 Country Club Rd , Albertville AL 35951.0",,,,,,,28.000000
8,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,871.0,101-0020,1.000050e+10,402 E McCord Ave,...,AL,35950.0,"402 E McCord Ave , Albertville AL 35950.0",307.0,31,307.0,15,,,28.000000
9,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,870.0,101-0010,1.000050e+10,600 E Alabama Ave,...,AL,35950.0,"600 E Alabama Ave , Albertville AL 35950.0",687.0,33,707.0,36,,,28.000000


In [16]:
#merge PVI2017 to our source
final = pd.merge(merged1, ave_17_df, how = 'left', on = ['LEAID'])

In [17]:
final

Unnamed: 0,SURVYEAR,FIPST,STABR,SEANAME,LEAID,ST_LEAID,SCHID,ST_SCHID,NCESSCH,MSTREET1,...,LZIP16,ADDRESS16,ALL_RLA00NUMVALID_1516,ALL_RLA00PCTPROF_1516,ALL_MTH00NUMVALID_1516,ALL_MTH00PCTPROF_1516,ess_strength,prog_strength,SD_lv_PVI_2014,SD_lv_PVI_2017
0,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1876.0,210-0001,1.000020e+10,1299 Hillsboro Parkway,...,36057.0,"Central Office Bldg , Mt Meigs AL 36057.0",,,,,,,-3.833333,-4.000000
1,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1706.0,210-0040,1.000020e+10,8950 Roebuck Blvd,...,35206.0,"8950 Roebuck Blvd , Birmingham AL 35206.0",,,,,,,-3.833333,-4.000000
2,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1670.0,210-0060,1.000020e+10,P O Box 66,...,36784.0,"2109 Bashi Rd Bldg 509 , Thomasville AL 36784.0",,,,,,,-3.833333,-4.000000
3,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,277.0,210-0020,1.000020e+10,P O Box 9486,...,35220.0,"1000 Industrial School Road , Birmingham AL 3...",,,,,,,-3.833333,-4.000000
4,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1705.0,210-0030,1.000020e+10,P O Box 66,...,36057.0,"1000 Industrial School Road , Mount Meigs AL ...",,,,,,,-3.833333,-4.000000
5,2015-2016,1.0,AL,Alabama Department Of Education,100002.0,210,1667.0,210-0050,1.000020e+10,P O Box 66,...,36067.0,"1601 County Rd. 57 , Prattville AL 36067.0",,,,,,,-3.833333,-4.000000
6,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,879.0,101-0110,1.000050e+10,901 W McKinney Ave,...,35950.0,"901 W McKinney Ave , Albertville AL 35950.0",759.0,30,772.0,43,,,28.000000,30.000000
7,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,1616.0,101-0035,1.000050e+10,257 Country Club Rd,...,35951.0,"257 Country Club Rd , Albertville AL 35951.0",,,,,,,28.000000,30.000000
8,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,871.0,101-0020,1.000050e+10,402 E McCord Ave,...,35950.0,"402 E McCord Ave , Albertville AL 35950.0",307.0,31,307.0,15,,,28.000000,30.000000
9,2015-2016,1.0,AL,Alabama Department Of Education,100005.0,101,870.0,101-0010,1.000050e+10,600 E Alabama Ave,...,35950.0,"600 E Alabama Ave , Albertville AL 35950.0",687.0,33,707.0,36,,,28.000000,30.000000


In [66]:
# turn the merged dataframe into csv
final.to_csv('pub_merged_2015_withSDlvPVI.csv', index = False)