# (prototype) Assemble schools

In this kernel I will assemble schools info from the school years 2016-2017 and 2017-2018, regarding the class that took SHSAT on October 2017.

![](../data/keep/timeline.png)

In [61]:
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [62]:
!ls ../data/pre

middle_school_base.pkl		  school_locations.pkl	    shsat_table.pkl
nyt_table.pkl			  schools2016.pkl	    test_results.pkl
school_demographics_20162017.pkl  schools_demographics.pkl


In [63]:
locations = pd.read_pickle('../data/pre/school_locations.pkl')  # School Locations (2017-2018)
tests = pd.read_pickle('../data/pre/test_results.pkl')  # NYS Test Results (from 2013 to 2017)
shsat = pd.read_pickle('../data/pre/shsat_table.pkl')  # SHSAT Testers and Offers (2017-18)
demographics = pd.read_pickle('../data/pre/school_demographics_20162017.pkl')  # School Quality Report (2016-17)

In [64]:
locations.shape

(1822, 3)

In [65]:
tests.shape

(46305, 25)

In [66]:
shsat.shape  # these are our targets

(594, 5)

In [67]:
demographics.shape

(1269, 9)

# Prepare

## Tests DataFrame

In [78]:
tests = pd.read_pickle('../data/pre/test_results.pkl')  # NYS Test Results
tests = tests.reset_index()
tests = tests[(tests['Grade'] == 7) &
              (tests['Year'] == 2017)]
tests = tests.set_index('DBN')
tests.head()

Unnamed: 0_level_0,Grade,Year,Charter School?,Number Tested - ELA,Mean Scale Score - ELA,# Level 1 - ELA,% Level 1 - ELA,# Level 2 - ELA,% Level 2 - ELA,# Level 3 - ELA,% Level 3 - ELA,# Level 4 - ELA,% Level 4 - ELA,# Level 3+4 - ELA,% Level 3+4 - ELA,Number Tested - Math,Mean Scale Score - Math,# Level 1 - Math,% Level 1 - Math,# Level 2 - Math,% Level 2 - Math,# Level 3 - Math,% Level 3 - Math,# Level 4 - Math,% Level 4 - Math,# Level 3+4 - Math,% Level 3+4 - Math
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
01M015,7,2017,0,0.0,,,,,,,,,,,,0.0,,,,,,,,,,,
01M019,7,2017,0,0.0,,,,,,,,,,,,0.0,,,,,,,,,,,
01M020,7,2017,0,0.0,,,,,,,,,,,,0.0,,,,,,,,,,,
01M034,7,2017,0,54.0,298.2778,9.0,16.66667,32.0,59.25926,12.0,22.22222,1.0,1.851852,13.0,24.07407,52.0,285.9423,27.0,51.92308,18.0,34.61538,7.0,13.46154,0.0,0.0,7.0,13.46154
01M063,7,2017,0,0.0,,,,,,,,,,,,0.0,,,,,,,,,,,


In [107]:
tests = pd.read_pickle('../data/pre/test_results.pkl')  # NYS Test Results
tests = tests.reset_index()
tests = tests[(tests['Grade'] == 7) &
              (tests['Year'] == 2017)]
tests = tests.set_index('DBN')
tests = tests[[
    'Charter School?',
    'Mean Scale Score - ELA',
    '% Level 2 - ELA',
    '% Level 3 - ELA',
    '% Level 4 - ELA',
    'Mean Scale Score - Math',
    '% Level 2 - Math',
    '% Level 3 - Math',
    '% Level 4 - Math',
]]

tests.head()

Unnamed: 0_level_0,Charter School?,Mean Scale Score - ELA,% Level 2 - ELA,% Level 3 - ELA,% Level 4 - ELA,Mean Scale Score - Math,% Level 2 - Math,% Level 3 - Math,% Level 4 - Math
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
01M015,0,,,,,,,,
01M019,0,,,,,,,,
01M020,0,,,,,,,,
01M034,0,298.2778,59.25926,22.22222,1.851852,285.9423,34.61538,13.46154,0.0
01M063,0,,,,,,,,


In [80]:
tests.shape

(1323, 9)

# Join

In [126]:
joined = shsat.join(locations).join(tests).join(demographics)
joined['Percent Other'] = 1 - joined.loc[:, 'Percent Asian':'Percent White'].sum(axis=1, skipna=False)
joined.head()

Unnamed: 0_level_0,# Students in HS Admissions,# SHSAT Testers,# SHSAT Offers,% SHSAT Testers,% SHSAT Offers,Latitude,Longitude,Borough,Charter School?,Mean Scale Score - ELA,% Level 2 - ELA,% Level 3 - ELA,% Level 4 - ELA,Mean Scale Score - Math,% Level 2 - Math,% Level 3 - Math,% Level 4 - Math,School Name,Percent Asian,Percent Black,Percent Hispanic,Percent White,Percent English Language Learners,Percent Students with Disabilities,Percent of Students Chronically Absent,Economic Need Index,Percent Other
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
01M034,58.0,6.0,,0.103448,,40.726008,-73.975058,manhattan,0.0,298.2778,59.25926,22.22222,1.851852,285.9423,34.61538,13.46154,0.0,P.S. 034 Franklin D. Roosevelt,0.053,0.297,0.614,0.03,0.08,0.38,0.305,0.872,0.006
01M140,67.0,6.0,,0.089552,,40.719148,-73.983769,manhattan,0.0,297.4545,60.0,21.81818,1.818182,291.2456,38.59649,14.03509,0.0,P.S. 140 Nathan Straus,0.044,0.121,0.794,0.03,0.124,0.338,0.395,0.853,0.011
01M184,88.0,67.0,23.0,0.761364,0.261364,40.711125,-73.985438,manhattan,0.0,330.1358,20.98765,39.50617,34.5679,339.6988,16.86747,28.91566,44.57831,P.S. 184m Shuang Wen,0.697,0.044,0.124,0.085,0.171,0.192,0.041,0.56,0.05
01M188,59.0,,,,,40.719888,-73.97738,manhattan,0.0,295.6667,45.09804,15.68627,1.960784,294.5,46.15385,21.15385,0.0,P.S. 188 The Island School,0.019,0.32,0.625,0.036,0.154,0.332,0.369,0.938,0.0
01M301,51.0,11.0,,0.215686,,40.730009,-73.984496,manhattan,0.0,304.7073,46.34146,31.70732,4.878049,288.1951,36.58537,12.19512,2.439024,"Technology, Arts, and Sciences Studio",0.087,0.323,0.496,0.055,0.047,0.441,0.382,0.746,0.039


In [128]:
columns = [
    'School Name',
    'Charter School?',
    'Borough',
    'Latitude',
    'Longitude',
    
    'Percent Asian',
    'Percent Black',
    'Percent Hispanic',
    'Percent White',
    'Percent Other',
    
    'Percent English Language Learners',
    'Percent Students with Disabilities',
    'Percent of Students Chronically Absent',
    'Economic Need Index',
    
    'Mean Scale Score - ELA',
    '% Level 2 - ELA',
    '% Level 3 - ELA',
    '% Level 4 - ELA',
    'Mean Scale Score - Math',
    '% Level 2 - Math',
    '% Level 3 - Math',
    '% Level 4 - Math',
    
    '# Students in HS Admissions',
    '# SHSAT Testers',
    '# SHSAT Offers',
    '% SHSAT Testers',
    '% SHSAT Offers',
]
joined = joined[columns]
joined.head()

Unnamed: 0_level_0,School Name,Charter School?,Borough,Latitude,Longitude,Percent Asian,Percent Black,Percent Hispanic,Percent White,Percent Other,Percent English Language Learners,Percent Students with Disabilities,Percent of Students Chronically Absent,Economic Need Index,Mean Scale Score - ELA,% Level 2 - ELA,% Level 3 - ELA,% Level 4 - ELA,Mean Scale Score - Math,% Level 2 - Math,% Level 3 - Math,% Level 4 - Math,# Students in HS Admissions,# SHSAT Testers,# SHSAT Offers,% SHSAT Testers,% SHSAT Offers
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
01M034,P.S. 034 Franklin D. Roosevelt,0.0,manhattan,40.726008,-73.975058,0.053,0.297,0.614,0.03,0.006,0.08,0.38,0.305,0.872,298.2778,59.25926,22.22222,1.851852,285.9423,34.61538,13.46154,0.0,58.0,6.0,,0.103448,
01M140,P.S. 140 Nathan Straus,0.0,manhattan,40.719148,-73.983769,0.044,0.121,0.794,0.03,0.011,0.124,0.338,0.395,0.853,297.4545,60.0,21.81818,1.818182,291.2456,38.59649,14.03509,0.0,67.0,6.0,,0.089552,
01M184,P.S. 184m Shuang Wen,0.0,manhattan,40.711125,-73.985438,0.697,0.044,0.124,0.085,0.05,0.171,0.192,0.041,0.56,330.1358,20.98765,39.50617,34.5679,339.6988,16.86747,28.91566,44.57831,88.0,67.0,23.0,0.761364,0.261364
01M188,P.S. 188 The Island School,0.0,manhattan,40.719888,-73.97738,0.019,0.32,0.625,0.036,0.0,0.154,0.332,0.369,0.938,295.6667,45.09804,15.68627,1.960784,294.5,46.15385,21.15385,0.0,59.0,,,,
01M301,"Technology, Arts, and Sciences Studio",0.0,manhattan,40.730009,-73.984496,0.087,0.323,0.496,0.055,0.039,0.047,0.441,0.382,0.746,304.7073,46.34146,31.70732,4.878049,288.1951,36.58537,12.19512,2.439024,51.0,11.0,,0.215686,


In [117]:
joined.isnull().mean()

School Name                               0.003367
Charter School?                           0.003367
Borough                                   0.000000
Latitude                                  0.000000
Longitude                                 0.000000
Percent Asian                             0.003367
Percent Black                             0.003367
Percent Hispanic                          0.003367
Percent White                             0.003367
Percent Other                             0.000000
Percent English Language Learners         0.003367
Percent Students with Disabilities        0.003367
Percent of Students Chronically Absent    0.020202
Economic Need Index                       0.003367
Mean Scale Score - ELA                    0.005051
% Level 2 - ELA                           0.005051
% Level 3 - ELA                           0.005051
% Level 4 - ELA                           0.005051
Mean Scale Score - Math                   0.010101
% Level 2 - Math               

In [131]:
shsat.shape

(594, 5)

In [130]:
joined.shape

(594, 27)

# Missing info

What are the schools with missing information?

In [137]:
joined[joined.loc[:, :'% Level 4 - Math'].isnull().any(axis=1)].sort_values('Mean Scale Score - ELA', ascending=False)

Unnamed: 0_level_0,School Name,Charter School?,Borough,Latitude,Longitude,Percent Asian,Percent Black,Percent Hispanic,Percent White,Percent Other,Percent English Language Learners,Percent Students with Disabilities,Percent of Students Chronically Absent,Economic Need Index,Mean Scale Score - ELA,% Level 2 - ELA,% Level 3 - ELA,% Level 4 - ELA,Mean Scale Score - Math,% Level 2 - Math,% Level 3 - Math,% Level 4 - Math,# Students in HS Admissions,# SHSAT Testers,# SHSAT Offers,% SHSAT Testers,% SHSAT Offers
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
84M482,Success Academy Charter School - Harlem 5,1.0,manhattan,40.820216,-73.944551,0.009,0.675,0.281,0.009,0.026,0.059,0.211,,0.721,344.0862,6.896552,44.82759,48.27586,353.3276,1.724138,43.10345,55.17241,59.0,40.0,,0.677966,
17K590,Medgar Evers College Preparatory School,0.0,brooklyn,40.66679,-73.951823,0.036,0.87,0.039,0.003,0.052,0.0,0.01,0.062,0.44,342.7,14.44444,31.11111,54.44444,,,,,88.0,15.0,,0.170455,
84X494,Success Academy Charter School - Bronx 2,1.0,bronx,40.835934,-73.90497,0.011,0.634,0.339,0.011,0.005,0.061,0.142,,0.781,341.8485,3.030303,56.06061,40.90909,359.6269,0.0,28.35821,71.64179,65.0,36.0,6.0,0.553846,0.092308
84M384,Success Academy Charter School - Harlem 2,1.0,manhattan,40.805653,-73.935461,0.016,0.694,0.263,0.019,0.008,0.026,0.192,,0.727,337.8611,9.722222,52.77778,37.5,346.3611,8.333333,43.05556,48.61111,69.0,37.0,,0.536232,
84X493,Success Academy Charter School - Bronx 1,1.0,bronx,40.813681,-73.925995,0.013,0.545,0.42,0.004,0.018,0.052,0.144,,0.786,336.6909,10.90909,58.18182,30.90909,351.1273,3.636364,41.81818,54.54545,47.0,30.0,,0.638298,
84M351,Success Academy Charter School - Harlem 1,1.0,manhattan,40.802649,-73.948088,0.01,0.737,0.204,0.014,0.035,0.037,0.183,,0.701,331.1308,15.88785,62.61682,21.49533,339.7664,9.345794,49.53271,37.38318,101.0,35.0,,0.346535,
84M385,Success Academy Charter School - Harlem 3,1.0,manhattan,40.785107,-73.942154,0.013,0.601,0.343,0.017,0.026,0.034,0.164,,0.723,330.061,17.07317,64.63415,18.29268,342.6173,12.34568,48.14815,38.2716,69.0,29.0,,0.42029,
84M386,Success Academy Charter School - Harlem 4,1.0,manhattan,40.801573,-73.955583,0.009,0.66,0.281,0.019,0.031,0.041,0.226,,0.692,328.3676,22.05882,52.94118,23.52941,339.2647,13.23529,36.76471,44.11765,66.0,20.0,,0.30303,
02M347,The 47 American Sign Language & English Lower ...,0.0,manhattan,40.738374,-73.981329,0.014,0.236,0.557,0.143,0.05,0.064,0.593,0.268,0.724,319.8333,33.33333,50.0,16.66667,,,,,9.0,,,,
29Q283,Preparatory Academy for Writers: A College Boa...,0.0,queens,40.66903,-73.757744,0.022,0.868,0.071,0.005,0.034,0.027,0.181,0.09,0.381,312.9556,40.0,42.22222,6.666667,,,,,43.0,10.0,,0.232558,


We examine them later. By now, we drop them.