In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Make plots look nicer
sns.set_theme(style="whitegrid", context="talk")
plt.rcParams["figure.figsize"] = (12, 6)
plt.rcParams["figure.dpi"] = 120

In [None]:
df = pd.read_csv("Access_to_Everyday_Life_Dataset.csv")
df.head(5)

In [None]:
df = df.rename(columns={
    "geometry/coordinates/0": "lon",
    "geometry/coordinates/1": "lat",
    "properties/attribute_id": "attribute_id",
    "properties/label_type": "label_type",
    "properties/neighborhood": "neighborhood",
    "properties/severity": "severity",
    "properties/is_temporary": "is_temporary",
    "geometry/type": "geometry_type",
    "type": "feature_type"
})

df.head()


In [None]:
df["severity"] = pd.to_numeric(df["severity"], errors="coerce")

# Sometimes these come as TRUE/FALSE strings
if df["is_temporary"].dtype == "object":
    df["is_temporary"] = df["is_temporary"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})

df[["severity", "is_temporary"]].dtypes


In [None]:
(df.isna().mean().sort_values(ascending=False) * 100).round(2)


In [None]:
import numpy as np

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate great-circle distance between two points on Earth (meters)
    """
    R = 6371000  # Earth radius in meters

    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dlambda = np.radians(lon2 - lon1)

    a = (
        np.sin(dphi / 2) ** 2
        + np.cos(phi1) * np.cos(phi2) * np.sin(dlambda / 2) ** 2
    )
    return 2 * R * np.arcsin(np.sqrt(a))


In [None]:
import folium
from folium.plugins import HeatMap, MarkerCluster, MiniMap, Fullscreen, LocateControl, MeasureControl, Draw, Geocoder

center = [df["lat"].mean(), df["lon"].mean()]


In [None]:
food_raw = pd.read_csv("Seattle_Emergency_Food.csv")
print("Shape:", food_raw.shape)
food_raw.columns

In [None]:
def pick_first_existing(cols, candidates):
    for c in candidates:
        if c in cols:
            return c
    return None

lat_col = pick_first_existing(food_raw.columns, ["Latitude", "latitude", "LATITUDE", "lat", "Lat"])
lon_col = pick_first_existing(food_raw.columns, ["Longitude", "longitude", "LONGITUDE", "lon", "Lon", "lng", "LON"])

name_col = pick_first_existing(food_raw.columns, ["Agency", "AGENCY", "Name", "name", "Site Name", "SITE_NAME"])
type_col = pick_first_existing(food_raw.columns, ["Food Resource Type", "FOOD_RESOURCE_TYPE", "Type", "type"])

if lat_col is None or lon_col is None:
    raise KeyError(f"Couldn't find lat/lon columns. Columns are: {list(food_raw.columns)}")

if name_col is None:
    name_col = "Agency"  # fallback guess; adjust if needed

food = food_raw.rename(columns={
    name_col: "poi_name",
    lat_col: "lat",
    lon_col: "lon"
}).copy()

food["poi_type"] = "Emergency Food"
food["source"] = "Seattle Open Data"

# optional: keep subtype if available
if type_col is not None and type_col in food_raw.columns:
    food["poi_subtype"] = food_raw[type_col]

food = food.dropna(subset=["lat", "lon", "poi_name"])
food[["poi_name", "poi_type", "lat", "lon"]].head()


In [None]:
lat_min, lat_max = df["lat"].min(), df["lat"].max()
lon_min, lon_max = df["lon"].min(), df["lon"].max()

food_seattle = food[
    (food["lat"] >= lat_min) & (food["lat"] <= lat_max) &
    (food["lon"] >= lon_min) & (food["lon"] <= lon_max)
].copy()

print("Emergency food sites inside Sidewalk coverage:", len(food_seattle))
food_seattle[["poi_name", "lat", "lon"]].head()


In [None]:
# Make sure these exist from earlier
# - haversine()
# - barriers dataframe

barriers = df.dropna(subset=["lat", "lon", "severity", "is_temporary"]).copy()
RADIUS_METERS = 200

food_results = []

for _, p in food_seattle.iterrows():
    dists = barriers.apply(
        lambda b: haversine(p["lat"], p["lon"], b["lat"], b["lon"]),
        axis=1
    )
    nearby = barriers[dists <= RADIUS_METERS]

    food_results.append({
        "poi_name": p["poi_name"],
        "poi_type": "Emergency Food",
        "barrier_count": len(nearby),
        "avg_severity": nearby["severity"].mean() if len(nearby) > 0 else 0,
        "pct_permanent": 1 - nearby["is_temporary"].mean() if len(nearby) > 0 else 0
    })

food_access = pd.DataFrame(food_results)
food_access["risk_score"] = (
    food_access["barrier_count"] *
    food_access["avg_severity"] *
    (1 + food_access["pct_permanent"])
)

food_access.sort_values("risk_score", ascending=False).head(10)


In [None]:
import pandas as pd
import numpy as np

from pyproj import Transformer


In [None]:
# ---- Helpers ----
def pick_first_existing(cols, candidates):
    for c in candidates:
        if c in cols:
            return c
    return None

def score_seattleish(lon, lat):
    lon = np.asarray(lon); lat = np.asarray(lat)
    ok = (lon > -123.5) & (lon < -121.5) & (lat > 47.0) & (lat < 48.2)
    return ok.sum()

def try_convert_xy_to_lonlat(df_in, xcol, ycol):
    """Try common EPSGs and return best lon/lat arrays + chosen epsg."""
    x = df_in[xcol].astype(float).values
    y = df_in[ycol].astype(float).values

    candidates = [3857, 2285]  # Web Mercator, WA StatePlane North (ftUS) - common for Seattle
    best = None

    for epsg in candidates:
        transformer = Transformer.from_crs(f"EPSG:{epsg}", "EPSG:4326", always_xy=True)
        lon, lat = transformer.transform(x, y)
        s = score_seattleish(lon, lat)
        if best is None or s > best["score"]:
            best = {"epsg": epsg, "lon": lon, "lat": lat, "score": s}

    return best["lon"], best["lat"], best["epsg"], best["score"]

def standardize_poi(df_raw, poi_type, name_candidates=None, lat_candidates=None, lon_candidates=None):
    """
    Returns standardized POI dataframe with columns:
    poi_name, poi_type, lat, lon
    Handles either lat/lon columns OR x/y projected coordinates.
    """
    name_candidates = name_candidates or ["FACILITY", "Agency", "Name", "SITE_NAME", "Site Name", "SCHOOL_NM", "Common Name", "COMMON_NAME"]
    lat_candidates  = lat_candidates  or ["lat", "Lat", "LAT", "Latitude", "LATITUDE", "Y", "y"]
    lon_candidates  = lon_candidates  or ["lon", "Lon", "LON", "Longitude", "LONGITUDE", "X", "x"]

    cols = df_raw.columns

    name_col = pick_first_existing(cols, name_candidates)

    # First try direct lat/lon
    lat_col = pick_first_existing(cols, ["lat","Lat","LAT","Latitude","LATITUDE"])
    lon_col = pick_first_existing(cols, ["lon","Lon","LON","Longitude","LONGITUDE","lng","LNG"])

    df = df_raw.copy()

    if lat_col and lon_col:
        df = df.rename(columns={name_col: "poi_name", lat_col: "lat", lon_col: "lon"})
        df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
        df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
        df["poi_type"] = poi_type
        return df[["poi_name","poi_type","lat","lon"]].dropna()

    # Else try x/y
    xcol = pick_first_existing(cols, ["x","X","POINT_X","EASTING"])
    ycol = pick_first_existing(cols, ["y","Y","POINT_Y","NORTHING"])

    if xcol and ycol:
        # If x/y already look like lon/lat degrees, keep as-is
        x = pd.to_numeric(df[xcol], errors="coerce")
        y = pd.to_numeric(df[ycol], errors="coerce")
        if x.between(-130, -100).mean() > 0.8 and y.between(40, 60).mean() > 0.8:
            df = df.rename(columns={name_col:"poi_name", xcol:"lon", ycol:"lat"})
            df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
            df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
            df["poi_type"] = poi_type
            return df[["poi_name","poi_type","lat","lon"]].dropna()

        # Otherwise convert projected x/y into WGS84 lon/lat
        lon, lat, epsg, score = try_convert_xy_to_lonlat(df, xcol, ycol)
        df["lon"] = lon
        df["lat"] = lat
        df = df.rename(columns={name_col:"poi_name"})
        df["poi_type"] = poi_type
        print(f"[{poi_type}] Converted {xcol}/{ycol} using EPSG:{epsg} (Seattle-ish points: {score}/{len(df)})")
        return df[["poi_name","poi_type","lat","lon"]].dropna()

    raise KeyError(f"Could not find usable lat/lon or x/y columns for POI type '{poi_type}'. Columns: {list(cols)}")

def filter_to_sidewalk_coverage(pois, barriers_df):
    lat_min, lat_max = barriers_df["lat"].min(), barriers_df["lat"].max()
    lon_min, lon_max = barriers_df["lon"].min(), barriers_df["lon"].max()
    return pois[
        (pois["lat"].between(lat_min, lat_max)) &
        (pois["lon"].between(lon_min, lon_max))
    ].copy()


In [None]:
transit_raw = pd.read_csv("Seattle_Transit_System.csv")
transit_raw.head(2)
transit_raw.columns

In [None]:
import pandas as pd

# --------------------
# 1) Load raw datasets
# --------------------
hosp_raw  = pd.read_csv("Seattle_Hospitals.csv")
food_raw  = pd.read_csv("Seattle_Emergency_Food.csv")
libs_raw  = pd.read_csv("Seattle_Libraries.csv")
parks_raw = pd.read_csv("Seattle_Parks_and_Rec.csv")
rail_raw  = pd.read_csv("Seattle_Light_Rails.csv")
pubs_raw  = pd.read_csv("Seattle_Public_Schools.csv")
pris_raw  = pd.read_csv("Seattle_Private_Schools.csv")

# -------------------------------
# 2) Dataset-specific FIXES FIRST
# -------------------------------

# Parks: X Coord / Y Coord -> x/y (so standardize_poi detects them)
parks_raw = parks_raw.rename(columns={"X Coord": "x", "Y Coord": "y"})

# Public schools: force a usable name column -> "Name"
pub_name_col = None
for c in ["school_name", "SCHOOL_NAME", "SCHOOL_NM", "NAME", "Name"]:
    if c in pubs_raw.columns:
        pub_name_col = c
        break
if pub_name_col is None:
    # heuristic: first column containing 'name'
    candidates = [c for c in pubs_raw.columns if "name" in c.lower()]
    pub_name_col = candidates[0] if candidates else None

print("Public school name column:", pub_name_col)
pubs_raw_fixed = pubs_raw.rename(columns={pub_name_col: "Name"}) if pub_name_col else pubs_raw

# Private schools: force a usable name column -> "Name"
pri_name_col = None
for c in ["school_name", "SCHOOL_NAME", "SCHOOL_NM", "NAME", "Name"]:
    if c in pris_raw.columns:
        pri_name_col = c
        break
if pri_name_col is None:
    candidates = [c for c in pris_raw.columns if "name" in c.lower()]
    pri_name_col = candidates[0] if candidates else None

print("Private school name column:", pri_name_col)
pris_raw_fixed = pris_raw.rename(columns={pri_name_col: "Name"}) if pri_name_col else pris_raw

# --------------------
# 3) Standardize ONCE
# --------------------
hosp  = standardize_poi(hosp_raw,  "Hospital",       name_candidates=["FACILITY","Facility","NAME","Name"])
food  = standardize_poi(food_raw,  "Emergency Food", name_candidates=["Agency","AGENCY","Name","SITE_NAME","Site Name"])
libs  = standardize_poi(libs_raw,  "Library",        name_candidates=["Name","NAME","LIBRARY","Library","Common Name","COMMON_NAME"])
parks = standardize_poi(parks_raw, "Parks & Rec",    name_candidates=["Name","NAME","PARK_NAME","Park Name","COMMON_NAME","Common Name"])
rail  = standardize_poi(rail_raw,  "Light Rail",     name_candidates=["Name","NAME","STATION","Station","STOP_NAME","stop_name"])

# IMPORTANT: use the FIXED dataframes here (not pubs_raw / pris_raw)
pubs  = standardize_poi(pubs_raw_fixed, "Public School",  name_candidates=["Name","NAME","SCHOOL_NM","SCHOOL_NAME","School Name"])
pris  = standardize_poi(pris_raw_fixed, "Private School", name_candidates=["Name","NAME","SCHOOL_NM","SCHOOL_NAME","School Name"])

# --------------------
# 4) Combine
# --------------------
pois_all = pd.concat([hosp, food, libs, parks, rail, pubs, pris], ignore_index=True).dropna()

print("Total POIs (all types):", len(pois_all))
display(pois_all["poi_type"].value_counts())


In [None]:
#ML Model for predicting POI Risk
from sklearn.neighbors import BallTree
import numpy as np

def compute_poi_risk(pois, barriers, radius_m=200):

    # Convert to radians for BallTree (haversine)
    pois_rad = np.deg2rad(pois[["lat","lon"]].values)
    bars_rad = np.deg2rad(barriers[["lat","lon"]].values)

    tree = BallTree(bars_rad, metric="haversine")
    radius = radius_m / 6371000  # meters → radians

    results = []
    for i, idxs in enumerate(tree.query_radius(pois_rad, r=radius)):
        nearby = barriers.iloc[idxs]
        results.append({
            "barrier_count": len(nearby),
            "avg_severity": nearby["severity"].mean() if len(nearby) else 0,
            "pct_permanent": (
                (~nearby["is_temporary"]).mean() if len(nearby) else 0
            )
        })

    risk = pd.DataFrame(results)
    out = pd.concat([pois.reset_index(drop=True), risk], axis=1)

    # Simple composite risk score (interpretable!)
    out["risk_score"] = (
        out["barrier_count"]
        * out["avg_severity"]
        * (1 + out["pct_permanent"])
    )

    return out


In [None]:
pois_risk = compute_poi_risk(pois_all, df)
pois_risk.sort_values("risk_score", ascending=False).head(10)


In [None]:
import pandas as pd
import numpy as np
import folium

from folium.plugins import (
    HeatMap, MarkerCluster, MiniMap, Fullscreen, LocateControl,
    MeasureControl, Draw, Geocoder
)
from folium import IFrame

# -------------------------
# 0) Setup + safety cleaning
# -------------------------
needed = ["lat", "lon", "severity", "label_type", "neighborhood", "is_temporary"]
for c in needed:
    if c not in df.columns:
        raise KeyError(f"Missing column '{c}'. Current columns: {list(df.columns)}")

df_clean = df.copy()
df_clean["severity"] = pd.to_numeric(df_clean["severity"], errors="coerce")
if df_clean["is_temporary"].dtype == "object":
    df_clean["is_temporary"] = df_clean["is_temporary"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})

center = [df_clean["lat"].mean(), df_clean["lon"].mean()]

# -------------------------
# 1) Base map (RENAMED)
# -------------------------
m_super = folium.Map(location=center, zoom_start=12, tiles="CartoDB positron")

# Global tools
MiniMap(toggle_display=True).add_to(m_super)
Fullscreen(position="topright").add_to(m_super)
LocateControl(auto_start=False).add_to(m_super)
MeasureControl(position="topleft").add_to(m_super)

Geocoder(collapsed=True, add_marker=True).add_to(m_super)

Draw(
    export=True,
    filename="selected_area.geojson",
    position="topleft",
    draw_options={"polyline": False, "circle": False, "circlemarker": False}
).add_to(m_super)

# ---------------------------------------------------
# 2) View A: Interactive Heatmap (severity-weighted)
# ---------------------------------------------------
heat_fg = folium.FeatureGroup(name="VIEW: Heatmap (weighted by severity)", show=False)
heat_data = df_clean[["lat", "lon", "severity"]].dropna().values.tolist()
HeatMap(heat_data, radius=10, blur=15, max_zoom=13).add_to(heat_fg)
heat_fg.add_to(m_super)

# ----------------------------------------------------------------
# 3) View B: Clustered Markers (basic)
# ----------------------------------------------------------------
cluster_fg = folium.FeatureGroup(name="VIEW: Clustered Markers (basic)", show=False)
cluster = MarkerCluster().add_to(cluster_fg)

sample_basic = df_clean.dropna(subset=["lat","lon"]).sample(min(3000, len(df_clean)), random_state=7)
for _, r in sample_basic.iterrows():
    popup = (f"Type: {r['label_type']}<br>"
             f"Neighborhood: {r['neighborhood']}<br>"
             f"Severity: {r['severity']}<br>"
             f"Temporary: {r['is_temporary']}")
    folium.CircleMarker(
        location=[r["lat"], r["lon"]],
        radius=3,
        popup=popup,
        fill=True
    ).add_to(cluster)

cluster_fg.add_to(m_super)

# ---------------------------------------------------------------------
# 4) View C: Clustered Markers (rich HTML popup w/ severity badge)
# ---------------------------------------------------------------------
badge_fg = folium.FeatureGroup(name="VIEW: Clustered Markers (rich popups)", show=False)
badge_cluster = MarkerCluster().add_to(badge_fg)

sample_badge = df_clean.dropna(subset=["lat","lon","severity"]).sample(min(2000, len(df_clean)), random_state=7)
for _, r in sample_badge.iterrows():
    sev = int(r["severity"]) if pd.notna(r["severity"]) else "NA"
    temp = r["is_temporary"]

    html = f"""
    <div style="font-family: Arial; font-size: 14px;">
      <div style="font-size:16px; font-weight:700;">{r['label_type']}</div>
      <div><b>Neighborhood:</b> {r['neighborhood']}</div>
      <div><b>Severity:</b>
        <span style="padding:2px 6px; border-radius:8px; border:1px solid #999;">
          {sev}
        </span>
      </div>
      <div><b>Temporary:</b> {temp}</div>
      <div style="margin-top:6px; color:#666;">
        ({r['lat']:.5f}, {r['lon']:.5f})
      </div>
    </div>
    """
    iframe = IFrame(html=html, width=270, height=150)
    folium.Marker([r["lat"], r["lon"]], popup=folium.Popup(iframe)).add_to(badge_cluster)

badge_fg.add_to(m_super)

# -------------------------------------------------------------
# 5) View D: Toggles (perm/temp/severe)
# -------------------------------------------------------------
toggle_group = folium.FeatureGroup(name="VIEW: Toggle layers (perm/temp/severe)", show=False)

fg_perm = folium.FeatureGroup(name="Permanent (not temporary)")
fg_temp = folium.FeatureGroup(name="Temporary")
fg_severe = folium.FeatureGroup(name="Severe (severity ≥ 4)")

clean = df_clean.dropna(subset=["lat","lon","severity","is_temporary"]).sample(min(5000, len(df_clean)), random_state=7)

for _, r in clean.iterrows():
    popup = (f"Type: {r['label_type']}<br>"
             f"Neighborhood: {r['neighborhood']}<br>"
             f"Severity: {r['severity']}<br>"
             f"Temporary: {r['is_temporary']}")

    marker = folium.CircleMarker(
        location=[r["lat"], r["lon"]],
        radius=3,
        popup=popup,
        fill=True
    )

    if r["is_temporary"] == True:
        marker.add_to(fg_temp)
    else:
        marker.add_to(fg_perm)

    if r["severity"] >= 4:
        folium.CircleMarker(
            location=[r["lat"], r["lon"]],
            radius=4,
            popup=popup,
            fill=True
        ).add_to(fg_severe)

fg_perm.add_to(toggle_group)
fg_temp.add_to(toggle_group)
fg_severe.add_to(toggle_group)

toggle_group.add_to(m_super)

# -------------------------------------------------------------
# 6) View E: Severity grid overlay
# -------------------------------------------------------------
def make_grid(df_in, cell_size=0.003):
    d = df_in.dropna(subset=["lat","lon","severity"]).copy()
    d["gx"] = (d["lon"] / cell_size).astype(int)
    d["gy"] = (d["lat"] / cell_size).astype(int)

    agg = d.groupby(["gx","gy"]).agg(n=("severity","size"), avg_sev=("severity","mean")).reset_index()
    agg["lon_min"] = agg["gx"] * cell_size
    agg["lon_max"] = (agg["gx"] + 1) * cell_size
    agg["lat_min"] = agg["gy"] * cell_size
    agg["lat_max"] = (agg["gy"] + 1) * cell_size
    return agg

grid_fg = folium.FeatureGroup(name="VIEW: Severity grid overlay", show=False)
grid = make_grid(df_clean, cell_size=0.003)

for _, r in grid.iterrows():
    if r["n"] < 5:
        continue
    opacity = min(0.85, max(0.1, (r["avg_sev"] - 1) / 4))
    folium.Rectangle(
        bounds=[[r["lat_min"], r["lon_min"]], [r["lat_max"], r["lon_max"]]],
        fill=True, fill_opacity=opacity, weight=0,
        popup=f"Count: {int(r['n'])}<br>Avg severity: {r['avg_sev']:.2f}"
    ).add_to(grid_fg)

grid_fg.add_to(m_super)

# -------------------------------------------------------------
# 7) View F: Top barrier types
# -------------------------------------------------------------
types_view = folium.FeatureGroup(name="VIEW: Barrier types (top 6 toggles)", show=False)

clean2 = df_clean.dropna(subset=["lat","lon","label_type","severity"])
top_types = clean2["label_type"].value_counts().head(6).index
type_layers = {t: folium.FeatureGroup(name=f"Type: {t}") for t in top_types}

sample_types = clean2[clean2["label_type"].isin(top_types)].sample(min(6000, len(clean2)), random_state=7)
for _, r in sample_types.iterrows():
    popup = (f"Type: {r['label_type']}<br>"
             f"Neighborhood: {r['neighborhood']}<br>"
             f"Severity: {r['severity']}")
    folium.CircleMarker(
        location=[r["lat"], r["lon"]],
        radius=3,
        popup=popup,
        fill=True
    ).add_to(type_layers[r["label_type"]])

for _, fg in type_layers.items():
    fg.add_to(types_view)

types_view.add_to(m_super)

# -------------------------------------------------------------
# 7.5) POIs: All essential services (ONE toggle)
# -------------------------------------------------------------
# Requires: pois_risk dataframe exists
required_poi_cols = ["lat", "lon", "poi_name", "poi_type", "risk_score", "barrier_count", "avg_severity"]
for c in required_poi_cols:
    if c not in pois_risk.columns:
        raise KeyError(f"pois_risk missing '{c}'. Columns: {list(pois_risk.columns)}")

pois_plot = pois_risk.dropna(subset=["lat","lon"]).copy()
pois_plot["lat"] = pd.to_numeric(pois_plot["lat"], errors="coerce")
pois_plot["lon"] = pd.to_numeric(pois_plot["lon"], errors="coerce")
pois_plot = pois_plot.dropna(subset=["lat","lon"])

poi_fg = folium.FeatureGroup(name="POIs: Essential Services (risk-weighted)", show=False)

color_map = {
    "Hospital": "red",
    "Emergency Food": "darkred",
    "Library": "blue",
    "Public School": "green",
    "Private School": "lightgreen",
    "Parks & Rec": "darkgreen",
    "Light Rail": "purple"
}

# Optional: limit for performance
top_n = 800
pois_plot = pois_plot.sort_values("risk_score", ascending=False).head(top_n)

for _, r in pois_plot.iterrows():
    popup = f"""
    <b>{r['poi_name']}</b><br>
    Type: {r['poi_type']}<br>
    Risk score: {r['risk_score']:.2f}<br>
    Barriers (200m): {int(r['barrier_count'])}<br>
    Avg severity: {r['avg_severity']:.2f}
    """

    folium.CircleMarker(
        location=[float(r["lat"]), float(r["lon"])],
        radius=4 + min(float(r["risk_score"]) / 10, 6),
        color=color_map.get(r["poi_type"], "gray"),
        fill=True,
        fill_opacity=0.75,
        popup=popup
    ).add_to(poi_fg)

poi_fg.add_to(m_super)

# -------------------------
# 8) Layer control + save
# -------------------------
folium.LayerControl(collapsed=False).add_to(m_super)

m_super.save("all_views_accessibility_map_with_pois.html")
m_super
