NYC Shared Location Maps
=========================================

In [31]:
# autoreload libraries
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np

import geopandas as gpd
import folium

from nycschools import dataloader as dl, geo, schools
from maptools import ui, tiger

import warnings
warnings.filterwarnings('ignore')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
import plotly.express as px

df = schools.load_school_demographics()
df[df.ay == df.ay.min()].dbn.nunique(), df[df.ay == df.ay.max()].dbn.nunique()
df["All Schools"] = 1
df["Community Schools"] = 1
df["Charter Schools"] = 0
df.loc[df.district == 84, "Charter Schools"] = 1
df.loc[df.district == 84, "Community Schools"] = 0
data = df[["ay", "dbn"]].groupby("ay").count().reset_index()

data.rename(columns={"ay": "Academic Year", "dbn":"Num. Schools"}, inplace=True)




fig = px.line(
    data,
    x="Academic Year",
    y="Num. Schools",
    markers=True,
    title="<b>Number of Public Schools in NYC (2005-2024)</b>",
)

fig.update_layout(
    xaxis=dict(
        title="<b>Academic Year</b>",
        tickmode="array",
        tickvals=[2005, 2010, 2015, 2020, 2023]
    ),
    yaxis_title="<b>Number of Schools</b>",
    margin=dict(l=40, r=20, t=60, b=40)
)
fig.show()
fig.write_image("/home/mxc/Downloads/num-schools-nyc.png", width=1920, height=1080)

In [33]:
import plotly.express as px

df = schools.load_school_demographics()
df[df.ay == df.ay.min()].dbn.nunique(), df[df.ay == df.ay.max()].dbn.nunique()
df["Community + Charter Schools"] = 1
df["Community Schools"] = 1
df["Charter Schools"] = 0
df.loc[df.district == 84, "Charter Schools"] = 1
df.loc[df.district == 84, "Community Schools"] = 0
data = df[["ay", "Community + Charter Schools", "Community Schools"]].groupby("ay").sum().reset_index()

data.rename(columns={"ay": "Academic Year"}, inplace=True)


data = data.melt(id_vars="Academic Year", var_name="School Type", value_name="Count")
data = data[data["Academic Year"] > 2009]
fig = px.line(
    data,
    x="Academic Year",
    y="Count",
    color="School Type",
    markers=True
)

fig.update_layout(
    xaxis=dict(
        title="<b>Academic Year</b>",
        tickmode="array",
        tickvals=list(range(2010, 2023, 2)) +  [2023]
    ),
    yaxis_title="<b>Number of Schools</b>",
    margin=dict(l=40, r=20, t=60, b=40)
)
fig.show()
fig.write_image("/home/mxc/Downloads/num-schools-by-type.svg", width=1600, height=1080)

In [34]:
df = schools.load_school_demographics()
df[df.ay == df.ay.min()].dbn.nunique(), df[df.ay == df.ay.max()].dbn.nunique()
df["Community + Charter Schools"] = 1
df["Community Schools"] = 1
df["Charter Schools"] = 0
df.loc[df.district == 84, "Charter Schools"] = 1
df.loc[df.district == 84, "Community Schools"] = 0
data = df[["ay", "Charter Schools", "Community Schools"] ].groupby("ay").sum().reset_index()
s11 = 1509
s23 = 1618 + 274


f"""
New schools: {s23-s11}
"""

data = data[data.ay.isin([2010, 2023])]
data.columns = ["ay", "charter", "com"]
data["all"] = data.charter + data.com
print(1892 - 1498, "new schools")
print(1618 - 1498, "new com schools")
print(f"{274/394:%}")
data

394 new schools
120 new com schools
69.543147%


Unnamed: 0,ay,charter,com,all
5,2010,0,1498,1498
17,2023,274,1618,1892


Load required data
------------------
This Notebook works with the following data:
- `loc` - school point locations
- `districts` - 32 geographic school district polygons
- `demos` - school student demographics
- `city_feet` - NYC DoB building footprints

In [35]:
districts = geo.load_districts()
campuses = dl.load("nyc-campuses.feather")
buildings = dl.load("school-buildings.geojson")
tracts = dl.load("nyc-census-tracts.geojson")
census_df = dl.load("nyc-census-demo-24.csv")
data = dl.load("radial-map-school-demographics.feather", gdf=True)

HTTPError: HTTP Error 404: Not Found

Get some basic demographic statistics
-------------------------------------
Calculate the counts and percentages for our basic race/ethnic demographic categories for all of the schools in our data set.

In [74]:
chart_colors = {
    "asian": "darkred",
    "black": "darkblue",
    "hispanic": "darkgreen",
    "white": "tomato",
    "native": "slateblue",
    "multi": "goldenrod",
    "missing": "gray"
}

labels = {
    "asian": "Asian Students",
    "black": "Black/African Am. Students",
    "hispanic": "Latine Students",
    "white": "White Students",
    "native": "Native American/American Indian Students",
    "multi": "Multiracial Students",
    "missing": "Missing Data"
}

import plotly.graph_objects as go
cols = ["asian_n", "black_n", "hispanic_n",
        "white_n", "native_american_n", "multi_racial_n", 
        "missing_race_ethnicity_data_n", "total_enrollment"]
city = data[cols].sum().to_frame().T
for c in cols:
    city[c.split("_")[0]] = city[c] / city.total_enrollment
city = city.iloc[0]
pct = [c for c in city.keys() if not c.endswith("_n") and c != "total_enrollment" and c != "total" and c != "missing"]
city = city[pct].sort_values(ascending=False)


values = city.sort_values(ascending=False)
labels = [labels[v] for v in city.keys()]
colors = [chart_colors[v] for v in city.keys()]
text = [f"{v:.0%}" for v in values]


fig = go.Figure(go.Bar(
    x=labels,
    y=values,
    text=text,
    textposition="auto",
    marker_color=colors,
))

fig.update_layout(
    title="<b>Citywide Racial/Ethnic Enrollment Breakdown</b>",
    yaxis_title="Percent of Total Enrollment",
    yaxis_tickformat=".0%",
    plot_bgcolor="white",
    margin=dict(l=40, r=20, t=60, b=40),
)

fig.show()
fig.write_image("/home/mxc/Downloads/nyc-student-demo.png", width=1920, height=1080)

Discover majority ethnicities
-----------------------------
In our data set (2023-2024 school year), there are 997,141 students in NYC public schools.
The City records the following ethnic/racial categories when students enroll in school:

-  **43%** Hispanic/Latine students
-  **23%** Black/African American students
-  **16%** Asian/Pacific Islander students
-  **14%** White students
- **1.7%** Native American/American Indian students
- **1.2%** Multi-racial students
- **0.4%** Missing race/ethnicity data

We know that schools often don't mirror this same mix as the general population. In the next section
we calculate the 2 largest ethnic groups in each school so that we can use this data on our map.

For every school in our set, the 2 largest demographic groups make up the majority
of students in the school.

We will plot circles for each "school" (not campus/building) where the size of the radius indicates
how large a majority the top two ethnic groups have, where the inner color indicates the size
and identity of the largest group, and the outer ring the size and identity of the second largest group.

We write functions here to format mouse over tooltips and formatted popups for each item on the map.



Prepare for map
---------------
- make sure all of our data is in the same CRS
- write functions we can `apply` to GIS data for the graph
- create user-friendly tooltips and popups

In [24]:
census_df["geoid"] = census_df["geoid"].astype(str)
tracts["geoid"] = tracts["GEOID"].astype(str)
tracts = tracts[["geoid", "geometry"]].merge( census_df, on="geoid", how="inner")
tracts["label"] = tracts.median_inc.apply( lambda x: "NA" if x <= 0 else f"${x/1000:,.0f}k")


In [25]:

# add any fields from campus into buildings if we want to use in popups or tooltips
buildings = buildings.merge( campuses[["campus_id", "poverty_pct", "total_enrollment"]], on="campus_id", how="inner")
def plot_campus(m):
    "Returns a function to plot campus nodes onto `m`"

    def f(campus):
        marker = folium.Circle(
            tooltip=None,
            location=[campus.geometry.centroid.y, campus.geometry.centroid.x],
            radius=5,
            stroke=False,
            popup=None,
            fill_color="darkgray",
            fill_opacity=1)
        marker.add_to(m)
    return f


def plot_school(m):
    "Returns a function to plot school circles onto `m`"
    def f(school):
        inner = folium.Circle(
            tooltip=f"<b>{school.dbn} {school.school_name}</b><br>{school[school.plurality + '_pct']:.0%} {school.plurality.capitalize()}",
            location=[school.geometry.centroid.y, school.geometry.centroid.x],
            radius=school.inner_r,
            fill=True,
            stroke=False,
            popup=school.popup,
            fill_color=school.color,
            fill_opacity=1)
        
        outer = folium.Circle(
            tooltip=f"<b>{school.dbn} {school.school_name}</b><br>{school[school.plurality2 + '_pct']:.0%} {school.plurality2.capitalize()}",
            location=[school.geometry.centroid.y, school.geometry.centroid.x],
            radius=school.outer_r,
            fill=True,
            stroke=False,
            fill_color=school.color2,
            fill_opacity=1)
        
        outer.add_to(m)
        inner.add_to(m)
    return f


def demo_popup(row):
    name = f"{row.school_name} ({row.dbn})"

    demo_labels = {
        "asian_pct": "Asian Students ({pct:.0%})",
        "black_pct": "Black Students ({pct:.0%})",
        "hispanic_pct": "Latine Students ({pct:.0%})",
        "white_pct": "White Students ({pct:.0%})",
        "poverty_pct": "Poverty Rate ({pct:.0%})"
    }

    # svg bars for the graph
    bars = []
    y = 20
    for k, v in demo_labels.items():
        color_key = k.split("_")[0]
        color = demo_colors.get(color_key, "purple")
        width = row[k] * 120  # scale bar width
        label = v.format(pct=row[k])
        bars.append(f"""
            <text x="120" y="{y}" font-size="10" text-anchor="end">{label}</text>
            <rect x="130" y="{y - 8}" width="120" height="10" fill="darkgray"></rect>
            <rect x="130" y="{y - 8}" width="{width}" height="10" fill="{color}"></rect>
        """)
        y += 20  # space between bars

    svg = f"""
    <svg width="300" height="{y}">
        {"".join(bars)}
    </svg>
    """

    html = f"""
<div style="width: 300px; padding: .5em; background-color:rgba(255,255,255,.9); color: black; border-radius: 8px">
  <h3 style="font-weight: bold; border-bottom: thin solid black; margin: 0 0 .5em 0;">
    {name}
  </h3>
  <b>Est. {row.open_year} | No. Students: {row.total_enrollment:,}</b>
  <div>{svg}</div>
</div>
"""
    return html

data["popup"] = data.apply(demo_popup, axis=1)

# nice tooltip for buildings
buildings["tooltip"] = buildings.apply( lambda x: f"<b>Built in {x.cnstrct_yr}</b><br>Poverty: {x.poverty_pct:.0%}<br>Enrollment: {x.total_enrollment:,}", axis=1)

Plot districts, schools, campuses, and buildings
------------------------------------------------
We bring everything together on the map:

- **geographic district** boundaries (dark blue), labeled with their district number [1..32]
- **building footprints** for each school building
  - some schools are missing footprints
  - shaded in Purple based on percent of students living in poverty, darker purple means poorer school populations
  - mouse over tells us the year the building was built and percent of students in poverty
- **campuses** are plotted as dark gray circles with schools at that campus in a radial pattern
  - some campuses have only one school (non-shared locations)
  - others connect to a node for each school on campus
  - campus pop-ups mirror school pop-ups, except they are the aggregate for the entire campus
- **schools** are plotted as circles with an inner and outer color indicating the percent size of the majority ethnicity
  - mouse over of the inner circle gives the school name and dbn, and largest ethnic group percentage
  - mouse over of the outer ring gives the percentage of the second largest group in the majority
  - popups offer full school demographics

In [26]:
m = ui.base_map(districts, zoom=14)

# district boundaries
m = districts.explore(m=m,
                      tooltip=False,
                      style_kwds={"color": "darkblue", "weight": .8, "fillOpacity": 0},
                      highlight=False)

_ = ui.label_shapes(m, districts, "district", { "color": "darkgray", "weight": 2, "fillOpacity": 0, "font-size": "18px"})


income_layer = folium.FeatureGroup(name="Income Tracts", show=True)

tracts[tracts.median_inc > 0].explore(
    m=income_layer,
    column="median_inc",
    cmap="Reds_r",
    style_kwds={"fillOpacity": 0.4, "opacity": 1, "weight": 1},
    tooltip="label",
    tooltip_kwds={"labels": False},
    highlight=False
)

# income_layer.add_to(m)


cluster_df = ui.cluster_radial(data, "campus_id", 30)


# building footprints
buildings.explore(m=m,
        tooltip_kwds={"labels": False},
        tooltip="tooltip",
        popup=None,
        column="poverty_pct",
        cmap="Purples",
        legend=False,
        style_kwds={"weight": 1, "opacity": 1,"color": "purple", "fillOpacity": 1})

# campus center nodes
style = {"color": "darkgray", "fillColor": "darkgray", "weight": 1, "opacity": 1, "fillOpacity": 1, "radius": 2}
campuses.apply(plot_campus(m), axis=1)


# cluster edges
cluster_df.set_geometry("spoke_geom").explore(m=m, style_kwds={ "color": "darkgray", "weight": .8}, highlight=False, tooltip=False)

# schools as cluster nodes
cluster_df.apply(plot_school(m), axis=1)

# add a legend
legend = {
    "Asian Student Plurality": demo_colors["asian"],
    "Black Student Plurality": demo_colors["black"],
    "Latine Student Plurality": demo_colors["hispanic"],
    "White Student Plurality": demo_colors["white"],
}

m = ui.map_legend(m, list(legend.items()), "NYC Schools by Racial Majority")
m

AttributeError: 'DataFrame' object has no attribute 'unary_union'

In [61]:

# m.save("nyc-colac-plurality.html")
