In [1]:
import geopandas as gpd
import pandas as pd
import requests

In [2]:
# https://data.cityofnewyork.us/Education/2021-DOE-Middle-School-Directory/f6s7-vytj/about_data
r = requests.get('https://data.cityofnewyork.us/resource/f6s7-vytj.json')
school_directory = r.json()
len(school_directory)

474

In [3]:
# source: https://data.cityofnewyork.us/Education/School-Point-Locations/jfju-ynrr/about_data
gdf = (
    gpd.read_file('data/SchoolPoints_APS_2024_08_28/SchoolPoints_APS_2024_08_28.shp')
    .rename(columns={'Geographic': 'District', 'ATS': 'DBN'})
    .set_index('DBN')
    .drop(columns=['Building_C', 'Location_C', 'Name'])
)
gdf.tail(5)

Unnamed: 0_level_0,District,Latitude,Longitude,geometry
DBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
88X966,8,40.816494,-73.890278,POINT (-8225428.122 4985312.134)
88X994,12,40.829306,-73.892243,POINT (-8225646.864 4987196.846)
88X995,10,40.857248,-73.903165,POINT (-8226862.696 4991308.524)
88X996,8,40.821218,-73.85593,POINT (-8221604.520 4986007.017)
93M359,2,40.747398,-73.992832,POINT (-8236844.381 4975154.021)


In [4]:
# https://infohub.nyced.org/reports/students-and-schools/school-quality/school-quality-reports-and-resources
excel_path = 'data/2022-23 NYC Schools Data.xlsx'
def read_sheet(sheet_name):
    return (
        pd.read_excel(
            excel_path,
            sheet_name=sheet_name
        ).melt(
            id_vars='DBN',
            var_name='Variable',
            value_name='Value'
        )
    )

In [5]:
norm_df = (
    pd.concat([
        read_sheet('Summary'),
        read_sheet('Student Achievement'),
        read_sheet('Framework'),
        read_sheet('Additional Info')],
        axis=0,
        ignore_index=True
    ).drop_duplicates()
)

In [7]:
middle_schools = norm_df.loc[(norm_df.Variable=='School Type') & (norm_df.Value=='Middle'), 'DBN'].values
df_ms = (
    norm_df[norm_df.DBN.isin(middle_schools)]
    .reset_index(drop=True)
    .pivot(index='DBN', columns='Variable', values='Value')
    .join(pd.DataFrame(school_directory).set_index('schooldbn'))
    .join(gdf)
    .reset_index()
    .set_index(['DBN', 'School Name'])
)
df_ms

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Incoming ELA Proficiency (Based on 5th Grade),Average Incoming Math Proficiency (Based on 5th Grade),Average Student Attendance,Collaborative Teachers - City Positive Responses,Collaborative Teachers - District Positive Responses,Collaborative Teachers - Element Score,Collaborative Teachers - Percent Positive,Collaborative Teachers Rating,Comparison Group - 9th Grade Adjusted Credit Accumulation of Former 8th Graders,"Comparison Group - Average Student Proficiency, ELA",...,swdfilled_prog15,eligibility_prog15,priority1_prog7,priority2_prog7,priority1_prog8,priority2_prog8,District,Latitude,Longitude,geometry
DBN,School Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01M332,University Neighborhood Middle School,2.73,2.21,0.839,84%,86%,3.44,0.75,Meeting Target,0.801192,2.754322,...,,,,,,,1.0,40.713362,-73.986051,POINT (-8236089.523 4970154.116)
01M378,School for Global Leaders,2.59,2.35,0.878,84%,86%,4.32,0.94,Exceeding Target,0.876439,2.900428,...,,,,,,,1.0,40.720040,-73.986038,POINT (-8236088.076 4971134.916)
01M450,East Side Community School,2.73,2.47,0.934,79%,86%,4.59,0.89,Exceeding Target,0.937712,2.940531,...,,,,,,,1.0,40.729152,-73.982472,POINT (-8235691.111 4972473.356)
01M839,Tompkins Square Middle School,2.81,2.67,0.926,84%,86%,3.63,0.87,Meeting Target,0.925021,3.03625,...,,,,,,,1.0,40.723130,-73.981597,POINT (-8235593.706 4971588.778)
02M104,J.H.S. 104 Simon Baruch,3.41,3.41,0.927,84%,81%,3.59,0.8,Meeting Target,0.966661,3.578007,...,,,,,,,2.0,40.735879,-73.981034,POINT (-8235531.033 4973461.588)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84X612,Creo College Preparatory Charter School,2.35,2.08,0.897,84%,83%,,,,,2.856616,...,,,,,,,7.0,40.816145,-73.919432,POINT (-8228673.530 4985260.800)
84X615,University Prep Charter Middle School,2.81,2.45,0.913,84%,83%,3.56,0.83,Meeting Target,0.91671,2.956732,...,,,,,,,7.0,40.811051,-73.909363,POINT (-8227552.654 4984511.551)
84X616,KIPP Bronx Charter School II,2.77,2.47,0.916,82%,83%,,0.84,,,2.906387,...,,,,,,,10.0,40.868781,-73.896376,POINT (-8226106.948 4993006.115)
84X627,Capital Preparatory Bronx Charter School,2.76,2.51,0.961,79%,83%,3.36,0.75,Meeting Target,0.89765,3.091544,...,,,,,,,11.0,40.879852,-73.826791,POINT (-8218360.781 4994635.981)


In [10]:
#df_ms.to_parquet('data/nyc_schools.parquet')
df_ms.drop(columns='geometry').to_csv('data/nyc_schools.csv')