In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.insert(0, '..')

In [3]:
from pathlib import Path

import fiona
import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd
from shapely.geometry import LineString, Point
from tqdm import tqdm
import altair as alt

In [4]:
from main import prepare_data_for_place, OUTPUT_COLUMNS
from src.route import (
    get_route_gdf,
    compute_routes_from_census_blocks_to_school,
    compute_routes_from_census_blocks_to_all_schools
)

In [5]:
def is_connected(g):
    import networkx as nx
    return nx.is_connected(g.to_undirected())

## Prepare out dir

In [6]:
import os
import shutil

OUT_PATH = Path("../data/out/notebook/")

# Delete the directory if it exists
if OUT_PATH.exists():
    shutil.rmtree(OUT_PATH)

# Recreate the directory
OUT_PATH.mkdir(parents=True, exist_ok=True)

## Load bike network

In [7]:
place = "Somerville, MA, USA"
nodes, edges = prepare_data_for_place(place)

> Getting bike network for Somerville, MA, USA
> Processing network for Somerville, MA, USA
> MODEL 1: Preparing speed data for Somerville, MA, USA
> MODEL 2: Preparing separation level data for Somerville, MA, USA
> MODEL 3: Preparing street category data for Somerville, MA, USA
> MODEL 4: Preparing lanes data for Somerville, MA, USA
> MODEL: Preparing composite score for Somerville, MA, USA


In [8]:
edges = edges[OUTPUT_COLUMNS]

In [9]:
G = ox.graph_from_gdfs(nodes, edges)

In [10]:
edges.sample(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,name,maxspeed_0,maxspeed_int,maxspeed_int_score,separation_level,separation_level_score,street_0,street_classification,street_classification_score,composite_score,length,width_float,width_half,geometry
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
952,999,0,Ellsworth Street,20 mph,20.0,0.0,none,4.0,residential,residential,2,2.8,86.256827,12.2,6.1,"LINESTRING (-71.08698 42.38707, -71.08696 42.3..."
557,459,0,Broadway,25 mph,25.0,1.0,none,4.0,secondary,medium-capacity,3,3.2,74.634756,21.3,10.65,"LINESTRING (-71.12496 42.4023, -71.12508 42.40..."
12,165,0,Pearl Street,20 mph,20.0,0.0,none,4.0,tertiary,medium-capacity,3,3.0,60.304337,15.2,7.6,"LINESTRING (-71.09209 42.38703, -71.092 42.387..."


In [11]:
is_connected(G)

True

## Load Schools

In [12]:
school_gdb_path = "../data/raw/SafeRoutesGISLayers.gdb.zip"

In [13]:
layers = fiona.listlayers(school_gdb_path)
layers

['SafetyZoneStreets', 'PublicSchools', 'Sidewalks', 'Signalized_Intersections']

In [14]:
# read school data
schools_gdf = gpd.read_file(school_gdb_path, layer='PublicSchools')

# save schools polygons
schools_gdf.to_file((OUT_PATH / "schools_poly.gpkg"), driver="GPKG")

# make geom col into centroids
schools_gdf['geometry'] = schools_gdf.centroid

# save schools polygons
schools_gdf.to_file((OUT_PATH / "schools_centroid.gpkg"), driver="GPKG")

In [15]:
schools_gdf.head(3)

Unnamed: 0,Name,GlobalID,Shape_Length,Shape_Area,geometry
0,West Somerville Neighborhood School,{423648E4-357B-4C51-8323-18DE5B5EF869},857.12613,20546.222891,POINT (757029.484 2973287.291)
1,Brown School,{32ED129B-38AE-4E8F-A71B-A18126973D75},511.378543,10156.639765,POINT (760400.444 2970061.762)
2,Healey School,{374CFA80-E38D-4411-AB46-7868E8DA8468},900.132189,38897.27228,POINT (765459.28 2970148.61)


## Load census blocks

In [16]:
# read census blocks
census_blocks = gpd.read_file("../data/raw/Census_2020_Blocks.zip")

# filter by TOWN attribute
somerville_census_blocks = census_blocks[census_blocks['TOWN'] == "SOMERVILLE"].copy()

# reset index
somerville_census_blocks = somerville_census_blocks.reset_index(drop=True)

# get a sample
half_n_census_blocks = len(somerville_census_blocks) // 4
somerville_census_sample = somerville_census_blocks.sample(half_n_census_blocks)

# save polygon version
somerville_census_blocks.to_file((OUT_PATH / "somer_blocks_poly.gpkg"), driver="GPKG")
somerville_census_sample.to_file((OUT_PATH / "somer_sample_poly.gpkg"), driver="GPKG")

# convert geometry to centroid
somerville_census_blocks['geometry'] = somerville_census_blocks.centroid
somerville_census_sample['geometry'] = somerville_census_sample.centroid

# save centroid version
somerville_census_blocks.to_file((OUT_PATH / "somer_blocks_centroid.gpkg"), driver="GPKG")
somerville_census_sample.to_file((OUT_PATH / "somer_sample_centroid.gpkg"), driver="GPKG")

In [17]:
somerville_census_blocks.head(3)

Unnamed: 0,OBJECTID,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,ALAND20,AWATER20,...,AREA_SQFT,AREA_ACRES,TOWN,TOWN_ID,BLKGRP20,TRACT20,COUSUBFP,SHAPEAREA,SHAPELEN,geometry
0,51243,25,17,351002,2002,250173510022002,Block 2002,G5040,18175,0,...,195621.53,4.49,SOMERVILLE,274,250173510022,25017351002,62535,18173.907063,626.570381,POINT (231829.87 904798.558)
1,51295,25,17,350400,2007,250173504002007,Block 2007,G5040,17571,0,...,189123.23,4.34,SOMERVILLE,274,250173504002,25017350400,62535,17570.193552,675.342228,POINT (231953.871 905367.479)
2,51456,25,17,350108,2003,250173501082003,Block 2003,G5040,19544,0,...,210356.28,4.83,SOMERVILLE,274,250173501082,25017350108,62535,19542.816299,707.71051,POINT (233007.237 905138.728)


## Make sure everything has same crs

- EPSG:26986 =  NAD83 / Massachusetts Mainland Meters
- EPSG:4326 = WGS 84 / web

In [18]:
def crs_first_line(gdf):
    return str(gdf.crs).splitlines()[0]

In [19]:
print("somerville_census_blocks:", crs_first_line(somerville_census_blocks))
print("somerville_census_sample:", crs_first_line(somerville_census_sample))
print("schools_gdf             :", crs_first_line(schools_gdf))
print("edges                   :", crs_first_line(edges))
print("nodes                   :", crs_first_line(nodes))

somerville_census_blocks: EPSG:26986
somerville_census_sample: EPSG:26986
schools_gdf             : EPSG:6492
edges                   : EPSG:4326
nodes                   : EPSG:4326


In [20]:
# use this one
use_crs = edges.crs

# make them match
somerville_census_blocks = somerville_census_blocks.to_crs(use_crs)
somerville_census_sample = somerville_census_sample.to_crs(use_crs)
schools_gdf = schools_gdf.to_crs(use_crs)
nodes = nodes.to_crs(use_crs)
edges = edges.to_crs(use_crs)

In [21]:
print("somerville_census_blocks:", crs_first_line(somerville_census_blocks))
print("somerville_census_sample:", crs_first_line(somerville_census_sample))
print("schools_gdf             :", crs_first_line(schools_gdf))
print("edges                   :", crs_first_line(edges))
print("nodes                   :", crs_first_line(nodes))

somerville_census_blocks: EPSG:4326
somerville_census_sample: EPSG:4326
schools_gdf             : EPSG:4326
edges                   : EPSG:4326
nodes                   : EPSG:4326


## Routing

In [22]:
G = ox.graph_from_gdfs(nodes, edges)
is_connected(G)

True

In [23]:
# pick a school
dest_point = schools_gdf.loc[0, 'geometry']

In [24]:
# pick a census centroid
orig_point = somerville_census_sample.iloc[0]['geometry']

In [25]:
# route based on composite score
route_gdf = get_route_gdf(G, orig_point, dest_point)
route_gdf

Unnamed: 0,geometry,mean_composite_score,median_composite_score,min_composite_score,max_composite_score,sum_length
0,"LINESTRING (-71.10862 42.39871, -71.10904 42.3...",1.898611,2.3,0.75,2.625,1746.979324


In [26]:
# route based on length
route_gdf = get_route_gdf(G, orig_point, dest_point, weight="length")
route_gdf

Unnamed: 0,geometry,mean_composite_score,median_composite_score,min_composite_score,max_composite_score,sum_length
0,"LINESTRING (-71.10862 42.39871, -71.10904 42.3...",1.898611,2.3,0.75,2.625,1746.979324


## Route loop

In [32]:
# loop over all census blocks, computing routes to one school
combined_gdf, errors = compute_routes_from_census_blocks_to_school(
    G, somerville_census_blocks,
    schools_gdf.loc[0],
    weight="composite_score"
)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 702/702 [00:05<00:00, 117.04it/s]


In [39]:
print("\n".join(errors))
print("\nmean composite score  :", combined_gdf['mean_composite_score'].mean())
print("median composite score:", combined_gdf['median_composite_score'].mean())

# save to file
# combined_gdf.to_file((OUT_PATH / "routes_school1.gpkg"), driver="GPKG")

Error on index 401: No route found between start and end
Error on index 558: No route found between start and end
Error on index 618: No route found between start and end
Error on index 625: No route found between start and end

mean composite score  : 1.826470300291281
median composite score: 2.0798352435530085


In [35]:
combined_gdf.head(2)

Unnamed: 0,geometry,mean_composite_score,median_composite_score,min_composite_score,max_composite_score,sum_length,from_block_id,to_school_name,to_school_id
0,"LINESTRING (-71.114 42.39291, -71.11489 42.393...",2.12037,2.625,0.0,3.5,2500.611909,250173510022002,West Somerville Neighborhood School,{423648E4-357B-4C51-8323-18DE5B5EF869}
1,"LINESTRING (-71.11089 42.39896, -71.11136 42.3...",2.048437,2.3,0.75,2.8,1564.153473,250173504002007,West Somerville Neighborhood School,{423648E4-357B-4C51-8323-18DE5B5EF869}


### Loop all schools

In [42]:
all_routes_gdf, errors = compute_routes_from_census_blocks_to_all_schools(
    G,
    somerville_census_blocks=somerville_census_sample,
    schools_gdf=schools_gdf,
    weight="composite_score"
)

----- West Somerville Neighborhood School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 115.46it/s]


----- Brown School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 123.54it/s]


----- Healey School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 120.24it/s]


----- Kennedy School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 121.73it/s]


----- East Somerville Community School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 107.42it/s]


----- Argenziano School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 121.62it/s]


----- Capuano Early Childhood Center -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 123.43it/s]


----- Somerville High School -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 124.83it/s]


----- Winter Hill at Edgerly -----


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:01<00:00, 123.12it/s]


In [None]:
# save only the combined file
all_routes_gdf.to_file(OUT_PATH / "routes_all_schools.gpkg", driver="GPKG")

In [33]:
mean_scores = []          # accumulate summary rows
all_routes = []           # accumulate all GeoDataFrames

for i, school in schools_gdf.iterrows():
    print(f"----- {school['Name']} -----")
    
    combined_gdf, errors = compute_routes_from_census_blocks_to_school(
        G, somerville_census_blocks,
        school,
        weight="composite_score"
    )

    print("errors:", errors)
    mean_score = combined_gdf['composite_score'].mean()
    print("mean composite score:", mean_score)

    # add summary row
    mean_scores.append({
        "School Name": school["Name"],
        "Mean Composite Score": mean_score,
        "Errors": errors
    })

    # add school name column
    combined_gdf = combined_gdf.assign(school_name=school["Name"])

    # collect for global merge
    all_routes.append(combined_gdf)

# summary results
mean_scores_df = pd.DataFrame(mean_scores)

# merge all routes into one GeoDataFrame
all_routes_gdf = gpd.GeoDataFrame(
    pd.concat(all_routes, ignore_index=True),
    crs=all_routes[0].crs
)

# save only the combined file
all_routes_gdf.to_file(
    OUT_PATH / "routes_all_schools.gpkg",
    driver="GPKG"
)

----- West Somerville Neighborhood School -----


NameError: name 'compute_routes' is not defined

In [None]:
chart = (
    alt.Chart(mean_scores_df)
    .mark_bar()
    .encode(
        x=alt.X(
            "School Name:N",
            sort=None,
            title="School Name",
            axis=alt.Axis(labelAngle=45)
        ),
        y=alt.Y("Mean Composite Score:Q", title="Mean Composite Score"),
        tooltip=["School Name", "Mean Composite Score"]
    )
    .properties(
        width=600,
        height=400,
        title="Mean Composite Scores by School"
    )
)

In [None]:
chart