In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys; sys.path.insert(0, '..')

In [None]:
from pathlib import Path

import fiona
import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd
from shapely.geometry import LineString, Point
from tqdm import tqdm
import altair as alt

In [None]:
from main import prepare_data_for_place, OUTPUT_COLUMNS
from src.route import get_route_gdf

In [None]:
def is_connected(g):
    import networkx as nx
    return nx.is_connected(g.to_undirected())

## Prepare out dir

In [None]:
import os
import shutil

OUT_PATH = Path("../data/out/notebook/")

# Delete the directory if it exists
if OUT_PATH.exists():
    shutil.rmtree(OUT_PATH)

# Recreate the directory
OUT_PATH.mkdir(parents=True, exist_ok=True)

## Load bike network

In [None]:
nodes, edges = prepare_data_for_place("Somerville, MA, USA")

In [None]:
edges = edges[OUTPUT_COLUMNS]

In [None]:
G = ox.graph_from_gdfs(nodes, edges)

In [None]:
edges.sample(3)

In [None]:
is_connected(G)

## Load Schools

In [None]:
school_gdb_path = "../data/raw/SafeRoutesGISLayers.gdb.zip"

In [None]:
layers = fiona.listlayers(school_gdb_path)
layers

In [None]:
# read school data
schools_gdf = gpd.read_file(school_gdb_path, layer='PublicSchools')

# save schools polygons
schools_gdf.to_file((OUT_PATH / "schools_poly.gpkg"), driver="GPKG")

# make geom col into centroids
schools_gdf['geometry'] = schools_gdf.centroid

# save schools polygons
schools_gdf.to_file((OUT_PATH / "schools_centroid.gpkg"), driver="GPKG")

In [None]:
schools_gdf.head(3)

## Load census blocks

In [None]:
# read census blocks
census_blocks = gpd.read_file("../data/raw/Census_2020_Blocks.zip")

# filter by TOWN attribute
somerville_census_blocks = census_blocks[census_blocks['TOWN'] == "SOMERVILLE"].copy()

# reset index
somerville_census_blocks = somerville_census_blocks.reset_index(drop=True)

# get a sample (half)
half_n_census_blocks = len(somerville_census_blocks) // 2
somerville_census_sample = somerville_census_blocks.sample(half_n_census_blocks)

# save polygon version
somerville_census_blocks.to_file((OUT_PATH / "somer_blocks_poly.gpkg"), driver="GPKG")
somerville_census_sample.to_file((OUT_PATH / "somer_sample_poly.gpkg"), driver="GPKG")

# convert geometry to centroid
somerville_census_blocks['geometry'] = somerville_census_blocks.centroid
somerville_census_sample['geometry'] = somerville_census_sample.centroid

# save centroid version
somerville_census_blocks.to_file((OUT_PATH / "somer_blocks_centroid.gpkg"), driver="GPKG")
somerville_census_sample.to_file((OUT_PATH / "somer_sample_centroid.gpkg"), driver="GPKG")

In [None]:
somerville_census_blocks.head(3)

## Make sure everything has same crs

- EPSG:26986 =  NAD83 / Massachusetts Mainland Meters
- EPSG:4326 = WGS 84 / web

In [None]:
def crs_first_line(gdf):
    return str(gdf.crs).splitlines()[0]

In [None]:
print("somerville_census_blocks:", crs_first_line(somerville_census_blocks))
print("somerville_census_sample:", crs_first_line(somerville_census_sample))
print("schools_gdf             :", crs_first_line(schools_gdf))
print("edges                   :", crs_first_line(edges))
print("nodes                   :", crs_first_line(nodes))

In [None]:
# use this one
use_crs = edges.crs

# make them match
somerville_census_blocks = somerville_census_blocks.to_crs(use_crs)
somerville_census_sample = somerville_census_sample.to_crs(use_crs)
schools_gdf = schools_gdf.to_crs(use_crs)
nodes = nodes.to_crs(use_crs)
edges = edges.to_crs(use_crs)

In [None]:
print("somerville_census_blocks:", crs_first_line(somerville_census_blocks))
print("somerville_census_sample:", crs_first_line(somerville_census_sample))
print("schools_gdf             :", crs_first_line(schools_gdf))
print("edges                   :", crs_first_line(edges))
print("nodes                   :", crs_first_line(nodes))

## Routing

In [None]:
G = ox.graph_from_gdfs(nodes, edges)
is_connected(G)

In [None]:
# pick a school
dest_point = schools_gdf.loc[0, 'geometry']

In [None]:
# pick a census centroid
orig_point = somerville_census_sample.iloc[0]['geometry']

In [None]:
route_gdf = get_route_gdf(G, orig_point, dest_point)

In [None]:
route_gdf.to_file((OUT_PATH / "route_single.gpkg"), driver="GPKG")

In [None]:
route_gdf

## Route loop

In [None]:
def compute_routes(school):
    
    errors = []
    dataframes = []
    
    for i, row in tqdm(somerville_census_blocks.iterrows(), total=len(somerville_census_blocks)):
        orig_point = row['geometry']
        dest_point = school['geometry']
        try:
            route_gdf = get_route_gdf(G, orig_point, dest_point)
        except Exception as e:
            errors.append(f"Error on index {i}: {e}")
            route_gdf = gpd.GeoDataFrame()
    
        if not route_gdf.empty:
            route_gdf["from_block_id"] = row["GEOID20"]
            route_gdf["to_school_name"] = school["Name"]
            route_gdf["to_school_id"] = school["GlobalID"]
            dataframes.append(route_gdf)
    
    combined_gdf = gpd.GeoDataFrame(pd.concat(dataframes, ignore_index=True), crs=use_crs)    
    return combined_gdf, errors

In [None]:
combined_gdf, errors = compute_routes(schools_gdf.iloc[0])

In [None]:
print("errors:", errors)
print("mean composite score:", combined_gdf['composite_score'].mean())

combined_gdf.to_file((OUT_PATH / "routes_school1.gpkg"), driver="GPKG")

### Try another school

In [None]:
combined_gdf, errors = compute_routes(schools_gdf.iloc[1])

In [None]:
print("errors:", errors)
print("mean composite score:", combined_gdf['composite_score'].mean())
combined_gdf.to_file((OUT_PATH / "routes_school2.gpkg"), driver="GPKG")

### Loop all schools

In [None]:
mean_scores = []          # accumulate summary rows
all_routes = []           # accumulate all GeoDataFrames

for i, school in schools_gdf.iterrows():
    print(f"----- {school['Name']} -----")
    
    combined_gdf, errors = compute_routes(school)

    print("errors:", errors)
    mean_score = combined_gdf['composite_score'].mean()
    print("mean composite score:", mean_score)

    # add summary row
    mean_scores.append({
        "School Name": school["Name"],
        "Mean Composite Score": mean_score,
        "Errors": errors
    })

    # add school name column
    combined_gdf = combined_gdf.assign(school_name=school["Name"])

    # collect for global merge
    all_routes.append(combined_gdf)

# summary results
mean_scores_df = pd.DataFrame(mean_scores)

# merge all routes into one GeoDataFrame
all_routes_gdf = gpd.GeoDataFrame(
    pd.concat(all_routes, ignore_index=True),
    crs=all_routes[0].crs
)

# save only the combined file
all_routes_gdf.to_file(
    OUT_PATH / "routes_all_schools.gpkg",
    driver="GPKG"
)

In [None]:
chart = (
    alt.Chart(mean_scores_df)
    .mark_bar()
    .encode(
        x=alt.X(
            "School Name:N",
            sort=None,
            title="School Name",
            axis=alt.Axis(labelAngle=45)
        ),
        y=alt.Y("Mean Composite Score:Q", title="Mean Composite Score"),
        tooltip=["School Name", "Mean Composite Score"]
    )
    .properties(
        width=600,
        height=400,
        title="Mean Composite Scores by School"
    )
)

In [None]:
chart