In [2]:
# autoreload libraries
%load_ext autoreload
%autoreload 2

import pandas as pd

import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
import numpy as np
import folium
from folium.map import Element
from shapely import affinity
import seaborn as sns
from IPython.display import Markdown as md
from IPython.display import display, HTML

import xyzservices.providers as xyz

from nycschools import schools, geo, snapshot as snap
from maptools import ui

import warnings
warnings.filterwarnings('ignore')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# load all of the data and clean it up
loc = geo.load_school_locations()
districts = geo.load_districts()
demos = schools.load_school_demographics()
feet = geo.load_school_footprints()

# join demographics to our locations for the most recent year
demos = demos[demos.ay == demos.ay.max()]
loc.drop(columns=["geo_district", "district", "zip", "beds"], inplace=True)
loc = loc.merge(demos, on='dbn', how='inner')

In [6]:
# let's find all the colocated schools by matching schools with the same point
campuses = pd.DataFrame()
# give each location an id
campuses["geometry"] = loc.geometry.unique()
campuses["campus_id"] = campuses.index + 1
# give each location a campus id
df = loc.merge(campuses, on="geometry", how="inner")
# sort locations by size (descending) so the largest school on each campus appears "first"
df = df.sort_values(by=["campus_id", "open_year"], ascending=True)
campuses = df.groupby("campus_id").agg(
    num_schools=("dbn", "count"),
    campus=("school_name", "first"),
    open_year=("open_year", "first"),
    total_enrollment=("total_enrollment", "sum"),
    female_n=("female_n", "sum"),
    male_n=("male_n", "sum"),
    asian_n=("asian_n", "sum"),
    black_n=("black_n", "sum"),
    hispanic_n=("hispanic_n", "sum"),
    white_n=("white_n", "sum"),
    max_white_pct=("white_pct", "max"),
    min_white_pct=("white_pct", "min"),
    swd_n=("swd_n", "sum"),
    ell_n=("ell_n", "sum"),
    max_ell_pct=("ell_pct", "max"),
    min_ell_pct=("ell_pct", "min"),
    poverty_n=("poverty_n", "sum"),
    max_poverty_pct=("poverty_pct", "max"),
    min_poverty_pct=("poverty_pct", "min"),
    geometry=("geometry", "first")
).reset_index()

campuses["white_diff"] = campuses.max_white_pct - campuses.min_white_pct
campuses["ell_diff"] = campuses.max_white_pct - campuses.min_white_pct
campuses["poverty_diff"] = campuses.max_white_pct - campuses.min_white_pct


campuses['female_pct'] = campuses['female_n'] / campuses['total_enrollment']
campuses['male_pct'] = campuses['male_n'] / campuses['total_enrollment']
campuses['asian_pct'] = campuses['asian_n'] / campuses['total_enrollment']
campuses['black_pct'] = campuses['black_n'] / campuses['total_enrollment']
campuses['hispanic_pct'] = campuses['hispanic_n'] / campuses['total_enrollment']
campuses['white_pct'] = campuses['white_n'] / campuses['total_enrollment']
campuses['swd_pct'] = campuses['swd_n'] / campuses['total_enrollment']
campuses['ell_pct'] = campuses['ell_n'] / campuses['total_enrollment']
campuses['poverty_pct'] = campuses['poverty_n'] / campuses['total_enrollment']

campuses["plurality"] = campuses[['asian_n', 'black_n', 'hispanic_n', 'white_n']].idxmax(axis=1)
campuses.plurality = campuses.plurality.str.replace("_n", "")

campuses = gpd.GeoDataFrame(campuses, geometry="geometry")
_ = campuses.set_crs(loc.crs, inplace=True)

df["colocated"] = df.campus_id.isin(campuses[campuses.num_schools > 1].campus_id)
df["plurality"] = df[['asian_n', 'black_n', 'hispanic_n', 'white_n']].idxmax(axis=1)
df.plurality = df.plurality.str.replace("_n", "")


In [57]:

murry = df[df.campus_id == 43]
cols = ["dbn", "school_name", "total_enrollment", "open_year"]
x = murry[cols].apply(lambda x: f"""{x.school_name} ({x.dbn})
Opened: {x.open_year}
Total enrollment: {x.total_enrollment:,}""", axis=1)
murry[["dbn", "school_name", "total_enrollment", "open_year"]]

Unnamed: 0,dbn,school_name,total_enrollment,open_year
120,02M520,Murry Bergtraum High School for Business Careers,130,1975
48,02M135,The Urban Assembly School for Emergency Manage...,297,2013
65,02M280,Manhattan Early College School for Advertising,405,2014
67,02M282,Urban Assembly Maker Academy,530,2014


In [5]:
m = ui.base_map(districts, zoom=14)
m = districts.explore(m=m,
                      tooltip=False,
                      popup=False,
                      style_kwds={"color": "darkblue",
                                  "weight": .8, "fillOpacity": 0},
                      highlight=False)

_ = ui.label_shapes(m, districts, "district", {
                    "color": "darkgray", "weight": 2, "fillOpacity": 0, "font-size": "18px"})


def school_marker(school, m):

    marker = folium.Circle(
        location=[school.geometry.centroid.y, school.geometry.centroid.x],
        radius=4,
        color=school.color,
        fill=True,
        fill_color=school.color,
        fill_opacity=1,
        opacity=1,
)
    marker.add_to(m)

cluster_df = ui.cluster_radial(df[df.colocated], "campus_id", 40)


# create a new col "radius" which is a log scale of total_enrollment with max 8, min 3
# where the scale range is from the smallest school total_enrollment.min() to the largest school total_enrollment.max()
# logs = np.log(df.total_enrollment)
# logs_norm = (logs - logs.min()) / (logs.max() - logs.min())
# campuses["radius"] = logs_norm * (8 - 3) + 3

colos = campuses[campuses.num_schools > 1].copy()

# campus buildings

colos.explore(m=m, popup_kwds={"labels": False},
              tooltip_kwds={"labels": False},
              style_kwds={"color": "darkgray", "fillColor": "purple",
                          "weight": 1, "opacity": 1, "fillOpacity": 1},
              marker_kwds={"radius": 4},)



# # cluster edges
# cluster_df.set_geometry("spoke_geom").explore(m=m, style_kwds={ "color": "darkgray", "weight": .8}, highlight=False, tooltip=False)





# df[df.colocated == False].apply( lambda school: school_marker(school, m), axis=1)
# # cluster nodes
# cluster_df.apply(lambda school: school_marker(school, m), axis=1)


m

AttributeError: 'GeoDataFrame' object has no attribute 'colocated'

In [63]:
campuses

Unnamed: 0,campus_id,num_schools,campus,open_year,total_enrollment,female_n,male_n,asian_n,black_n,hispanic_n,white_n,max_white_pct,min_white_pct,swd_n,ell_n,max_ell_pct,min_ell_pct,poverty_n,max_poverty_pct,min_poverty_pct,geometry,white_diff,ell_diff,poverty_diff,female_pct,male_pct,asian_pct,black_pct,hispanic_pct,white_pct,swd_pct,ell_pct,poverty_pct,plurality
0,1,1,P.S. 015 Roberto Clemente,1904,189,95.0,94.0,19,44,107,9,0.047619,0.047619,44.0,24.0,0.126984,0.126984,163,0.862434,0.862434,POINT (-73.97875 40.72208),0.000000,0.000000,0.000000,0.502646,0.497354,0.100529,0.232804,0.566138,0.047619,0.232804,0.126984,0.862434,hispanic
1,2,1,P.S. 020 Anna Silver,1963,380,194.0,186.0,60,57,221,26,0.068421,0.068421,63.0,119.0,0.313158,0.313158,326,0.857895,0.857895,POINT (-73.98631 40.72131),0.000000,0.000000,0.000000,0.510526,0.489474,0.157895,0.150000,0.581579,0.068421,0.165789,0.313158,0.857895,hispanic
2,3,1,P.S. 034 Franklin D. Roosevelt,1955,217,98.0,119.0,5,81,122,7,0.032258,0.032258,57.0,39.0,0.179724,0.179724,209,0.960000,0.960000,POINT (-73.97506 40.72601),0.000000,0.000000,0.000000,0.451613,0.548387,0.023041,0.373272,0.562212,0.032258,0.262673,0.179724,0.963134,hispanic
3,4,2,The STAR Academy - P.S.63,1905,432,214.0,217.0,16,55,225,108,0.362832,0.126214,125.0,33.0,0.150485,0.008850,267,0.796117,0.455752,POINT (-73.98621 40.72444),0.236618,0.236618,0.236618,0.495370,0.502315,0.037037,0.127315,0.520833,0.250000,0.289352,0.076389,0.618056,hispanic
4,5,3,P.S. 064 Robert Simon,1954,709,358.0,351.0,40,111,405,122,0.266667,0.048387,233.0,44.0,0.134409,0.033537,504,0.903226,0.584615,POINT (-73.9816 40.72313),0.218280,0.218280,0.218280,0.504937,0.495063,0.056417,0.156559,0.571227,0.172073,0.328632,0.062059,0.710860,hispanic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1344,1345,1,Bronx Preparatory Charter School,2000,895,476.0,419.0,5,452,423,6,0.006704,0.006704,135.0,46.0,0.051397,0.051397,761,0.850279,0.850279,POINT (-73.90012 40.83907),0.000000,0.000000,0.000000,0.531844,0.468156,0.005587,0.505028,0.472626,0.006704,0.150838,0.051397,0.850279,black
1345,1346,1,Family Life Academy Charter School,2000,409,207.0,202.0,2,65,337,0,0.000000,0.000000,50.0,100.0,0.244499,0.244499,393,0.960000,0.960000,POINT (-73.91866 40.84026),0.000000,0.000000,0.000000,0.506112,0.493888,0.004890,0.158924,0.823961,0.000000,0.122249,0.244499,0.960880,hispanic
1346,1347,1,Harriet Tubman Charter School,2000,491,247.0,244.0,0,290,186,5,0.010183,0.010183,57.0,63.0,0.128310,0.128310,431,0.877800,0.877800,POINT (-73.90577 40.83233),0.000000,0.000000,0.000000,0.503055,0.496945,0.000000,0.590631,0.378819,0.010183,0.116090,0.128310,0.877800,black
1347,1348,1,Icahn Charter School 1,2000,337,187.0,150.0,8,184,138,1,0.002967,0.002967,26.0,13.0,0.038576,0.038576,309,0.916914,0.916914,POINT (-73.9047 40.83865),0.000000,0.000000,0.000000,0.554896,0.445104,0.023739,0.545994,0.409496,0.002967,0.077151,0.038576,0.916914,black


In [64]:
m = ui.base_map(campuses, zoom=12)

m = districts.explore(m=m,
                      tooltip=False,
                      popup=False,
                      style_kwds={"color": "darkblue", "weight": .8, "fillOpacity": 0},
                      highlight=False)

_ = ui.label_shapes(m, districts, "district", { "color": "darkgray", "weight": .8, "fillOpacity": 0, "font-size": "18px"})

demo_colors = {
    "asian": "darkred",
    "black": "darkblue",
    "hispanic": "darkgreen",
    "white": "darkorange",
    "poverty": "purple",
}


def demo_popup(row):
    name = ""
    if "dbn" in row:
        name = f"<b>{row.school_name}</b> ({row.dbn})"
    else:
        name = f"<b>{row.campus}</b> (Open {row.open_year})<br>{row.num_schools} schools"
        
    
    demo_labels = {
        "asian_pct": "Asian Students (%)",
        "black_pct": "Black Students (%)",
        "hispanic_pct": "Latinx Students (%)",
        "white_pct": "White Students (%)",
        "poverty_pct": "Poverty Rate (%)"
    }
    bars = []
    for k, v in demo_labels.items():
        color_key = k.split("_")[0]
        color = demo_colors.get(color_key, "purple")
        width = row[k] * 120
        pct = row[k]
        label = demo_labels[k]
        bar = f"""
  <div style="display: flex; margin-bottom: 2px; height: 16px; align-items: center">
    <div style="width: 120px; text-align: right; padding-right: 5px; font-weight: bold; font-size: 12px;">{label}</div>
    <div title="{pct:.1%}" style="flex-grow: 1;cursor: pointer; background-color:gray"><div style="background-color: {color}; height: 10px; width: {width}px;"></div></div>
  </div>"""
        bars.append(bar)

    html = f"""
<div style="width: 250px; padding: .5em; background-color:rgba(255,255,255,.9); color: black; border-radius: 8px">
  <h3 style="font-weight: bold; border-bottom: thin solid black;padding-top: 0; padding-bottom: .25em;">
    {name}
  </h3>
  <b>No. Students: {row.total_enrollment:,}</b>
  <div style="display: flex; flex-direction: column; width: 100%;">
    {"".join(bars)}
  </div>
</div>
"""
    return html



In [65]:


def school_marker(school, m):
    
    point = school.geometry.centroid

    marker = folium.Circle(
        location=[school.geometry.centroid.y, school.geometry.centroid.x],
        radius=10,
        color=school.color,
        fill=True,
        fill_color=school.color,
        fill_opacity=1,
        opacity=1,
        popup=school.popup,
        tooltip=school.school_name)
    marker.add_to(m)


df["color"] = df.plurality.map(demo_colors)
df["popup"] = df.apply(demo_popup, axis=1)
cluster_df = ui.cluster_radial(df[df.colocated], "campus_id", 80)



In [66]:


colos = campuses[campuses.num_schools > 1].copy()
colos["color"] = colos.plurality.map(demo_colors)
colos["popup"] = colos.apply(demo_popup, axis=1)
df[df.colocated == False].apply(lambda school: school_marker(school, m), axis=1)
# cluster edges
cluster_df.set_geometry("spoke_geom").explore(m=m, style_kwds={ "color": "darkgray", "weight": .8}, highlight=False, tooltip=False)


# colos.explore(m=m, popup_kwds={"labels": False}, tooltip_kwds={"labels": False},
#               style_kwds={"weight": .8, "fillOpacity": 1},
#               marker_kwds={"radius": "6" }, 
#               tooltip="campus", popup="popup")
colos.apply(lambda campus: school_marker(campus, m), axis=1)

# cluster nodes
cluster_df.apply(lambda school: school_marker(school, m), axis=1)

legend = {
    "Asian Student Plurality": demo_colors["asian"],
    "Black Student Plurality": demo_colors["black"],
    "Latinx Student Plurality": demo_colors["hispanic"],
    "White Student Plurality": demo_colors["white"],
}

m = ui.map_legend(m, list(legend.items()), "NYC Schools by Racial Plurality")

m

AttributeError: 'Series' object has no attribute 'school_name'

In [None]:

# m.save("nyc-colac-plurality.html")


In [None]:

from maptools import ui, census_vars
from IPython.display import Markdown as md
from census import Census
import us
import pandas as pd
import geopandas as gpd

import os
api_key = os.getenv('CENSUS_API_KEY')

# from google.colab import userdata
# api_key = userdata.get('CENSUS_API_KEY')
c = Census(api_key)


field_names = {
    'C16001_001E': 'total',
    'C16001_002E': 'only_english',
    'C16001_003E': 'spanish',
    'C16001_006E': 'french_haitian_or_cajun',
    'C16001_009E': 'german_or_other_west_germanic_languages',
    'C16001_012E': 'russian_polish_or_other_slavic_languages',
    'C16001_015E': 'other_indo_european_languages',
    'C16001_018E': 'korean',
    'C16001_021E': 'chinese',
    'C16001_024E': 'vietnamese',
    'C16001_027E': 'tagalog',
    'C16001_030E': 'other_asian_and_pacific_island_languages',
    'C16001_033E': 'arabic',
    'C16001_036E': 'other_languages'
}
fields = list(field_names.keys())
data = c.acs5.get(fields=fields, geo={'for': 'state:*'}, year=2022)

df = pd.DataFrame(data)
df.rename(columns=field_names, inplace=True)
df.sort_values("total", ascending=False, inplace=True)

df.head(10)

Unnamed: 0,total,only_english,spanish,french_haitian_or_cajun,german_or_other_west_germanic_languages,russian_polish_or_other_slavic_languages,other_indo_european_languages,korean,chinese,vietnamese,tagalog,other_asian_and_pacific_island_languages,arabic,other_languages,state
4,37097796.0,20809671.0,10478088.0,134920.0,118844.0,256925.0,1210840.0,359747.0,1269524.0,559059.0,772833.0,713292.0,204651.0,209402.0,6
43,27319920.0,17737503.0,7790925.0,90641.0,74492.0,59500.0,426039.0,66818.0,183619.0,231673.0,86800.0,271935.0,110884.0,189091.0,48
9,20529964.0,14384847.0,4549382.0,538169.0,80397.0,128712.0,367930.0,17768.0,76455.0,69570.0,70430.0,94225.0,70511.0,81568.0,12
32,18872507.0,13097954.0,2784045.0,281762.0,261221.0,388155.0,742066.0,91429.0,602162.0,22091.0,78814.0,177857.0,117917.0,227034.0,36
38,12300637.0,10848115.0,649164.0,54924.0,110679.0,86863.0,196453.0,27226.0,94107.0,35343.0,17035.0,84835.0,40409.0,55484.0,42
13,12036469.0,9218963.0,1634951.0,42258.0,38322.0,296204.0,301817.0,41039.0,107876.0,23942.0,82943.0,111259.0,69061.0,67834.0,17
35,11098280.0,10271569.0,261624.0,31865.0,100200.0,47081.0,124174.0,9656.0,47571.0,11681.0,13018.0,57201.0,45552.0,77088.0,39
10,10087026.0,8625820.0,810575.0,58914.0,29550.0,30147.0,153567.0,50008.0,56041.0,57421.0,15068.0,83126.0,20651.0,96138.0,13
33,9880447.0,8663829.0,774886.0,38024.0,25773.0,27594.0,102777.0,16130.0,38296.0,24822.0,15442.0,82015.0,30931.0,39928.0,37
22,9505118.0,8561525.0,274677.0,23036.0,44277.0,54395.0,161374.0,16691.0,47174.0,15412.0,15746.0,68333.0,161723.0,60755.0,26
