In [14]:
# trauma_plots.py
# Build interactive Plotly visuals from trauma_distances.csv

from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# === 1) INPUT / OUTPUT PATHS ===
# Point this to your CSV created by SAS (distances_final export).
# If the file sits next to this script, just use the filename.
INPUT_CSV = Path("trauma_distances.csv")

# Output HTML files (standalone; can be opened directly)
OUT_TIME_MAP         = Path("time_map.html")
OUT_SCATTER_GEO_DRIVE= Path("scatter_geo_drive.html")
OUT_STRAIGHT_LINES   = Path("straight_lines.html")

# === 2) LOAD & BASIC CHECKS ===
if not INPUT_CSV.exists():
    raise FileNotFoundError(f"Could not find {INPUT_CSV.resolve()}")

df = pd.read_csv(INPUT_CSV)
expected_cols = {
    # minimal columns we will use
    "zip","CITY","hosp_name",
    "zip_lon","zip_lat","hosp_lon","hosp_lat",
    "miles_geo","miles_drive","time_sec","http_code"
}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"CSV missing columns: {sorted(missing)}")

# Only keep successful routes (HTTP 200)
df_ok = df[df["http_code"] == 200].copy()
if df_ok.empty:
    raise ValueError("No successful routes (http_code==200) found in the file.")

# Useful derived fields
df_ok["time_min"] = df_ok["time_sec"] / 60.0

# === 3) TIME MAP (ZIP bubbles sized/colored by minutes) ===
# Uses scatter_geo (no Mapbox token needed).
time_map = px.scatter_geo(
    df_ok.rename(columns={"zip_lon":"x","zip_lat":"y"}),
    lon="x", lat="y",
    size="time_min", color="time_min",
    color_continuous_scale="Blues",
    hover_name="CITY",
    hover_data={
        "zip": True,
        "hosp_name": True,
        "time_min": ":.1f"
    },
    scope="usa", projection="albers usa",
    title="Estimated Travel Time to Nearest Trauma Center (minutes)"
)
time_map.update_layout(margin=dict(l=0, r=0, t=60, b=0))
time_map.write_html(OUT_TIME_MAP, include_plotlyjs="cdn")
print(f"Wrote {OUT_TIME_MAP.resolve()}")

# === 4) SCATTER: Straight-line vs Driving miles ===
scatter = px.scatter(
    df_ok,
    x="miles_geo", y="miles_drive",
    hover_data=["zip","CITY","hosp_name"],
    labels={"miles_geo":"Geodesic miles", "miles_drive":"Driving miles"},
    title="Straight-line vs Driving Distance to Nearest Trauma Center (GA)"
)
# Add 45° reference line (y=x)
m = float(pd.concat([df_ok["miles_geo"], df_ok["miles_drive"]]).max())
scatter.add_shape(
    type="line", x0=0, y0=0, x1=m, y1=m,
    line=dict(color="lightblue")
)
scatter.update_layout(margin=dict(l=20, r=20, t=60, b=60))
scatter.write_html(OUT_SCATTER_GEO_DRIVE, include_plotlyjs="cdn")
print(f"Wrote {OUT_SCATTER_GEO_DRIVE.resolve()}")

# === 5) STRAIGHT-LINE CONNECTIONS (ZIP → hospital) ===
# We draw one "line trace" per ZIP, plus endpoint layers for ZIPs and hospitals.
fig_lines = go.Figure()

# Lightweight, readable line layer per ZIP
for row in df_ok.itertuples(index=False):
    fig_lines.add_trace(go.Scattergeo(
        lon=[row.zip_lon, row.hosp_lon],
        lat=[row.zip_lat, row.hosp_lat],
        mode="lines",
        line=dict(width=1, color="rgba(0,90,200,0.25)"),
        hoverinfo="skip",
        showlegend=False
    ))

# Endpoints: ZIPs
fig_lines.add_trace(go.Scattergeo(
    lon=df_ok["zip_lon"], lat=df_ok["zip_lat"],
    mode="markers",
    marker=dict(size=3, color="midnightblue"),
    name="ZIP centroids",
    hovertext=df_ok["zip"].astype(str) + ": " + df_ok["CITY"].astype(str),
    hoverinfo="text"
))
# Endpoints: Trauma centers
fig_lines.add_trace(go.Scattergeo(
    lon=df_ok["hosp_lon"], lat=df_ok["hosp_lat"],
    mode="markers",
    marker=dict(size=7, color="crimson", symbol="diamond"),
    name="Trauma centers",
    hovertext=df_ok["hosp_name"],
    hoverinfo="text"
))

fig_lines.update_geos(
    scope="usa",
    projection_type="albers usa",
    showcountries=False,
    showland=True
)
fig_lines.update_layout(
    title="Straight-Line Connections: ZIP → Nearest Trauma Center (GA)",
    margin=dict(l=0, r=0, t=60, b=0)
)
fig_lines.write_html(OUT_STRAIGHT_LINES, include_plotlyjs="cdn")
print(f"Wrote {OUT_STRAIGHT_LINES.resolve()}")


Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/time_map.html
Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/scatter_geo_drive.html
Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/straight_lines.html


In [16]:
# === Road-distance bubble map (driving miles) ===
import plotly.express as px
miles_map = px.scatter_geo(
    df_ok.rename(columns={"zip_lon":"x","zip_lat":"y"}),
    lon="x", lat="y",
    size="miles_drive", color="miles_drive",
    color_continuous_scale="Viridis",
    hover_name="CITY",
    hover_data={
        "zip": True,
        "hosp_name": True,
        "miles_drive": ":.1f",
        "miles_geo": ":.1f",
        "time_min": ":.1f" if "time_min" in df_ok.columns else False
    },
    scope="usa", projection="albers usa",
    title="Driving Distance to Nearest Trauma Center (miles)"
)
miles_map.update_layout(margin=dict(l=0, r=0, t=60, b=0))
miles_map.write_html("miles_map.html", include_plotlyjs="cdn")
print("Wrote", Path("miles_map.html").resolve())


Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/miles_map.html


In [18]:
import numpy as np
import plotly.figure_factory as ff

vals = df_ok["miles_drive"].dropna().values
p50, p75, p90 = np.percentile(vals, [50,75,90])

hist = px.histogram(df_ok, x="miles_drive", nbins=40,
                    title="Distribution of Driving Distance (miles)")
for v, name in [(p50,"P50"), (p75,"P75"), (p90,"P90")]:
    hist.add_vline(v, line_dash="dash", line_color="tomato", annotation_text=name)
hist.update_layout(xaxis_title="Driving miles", yaxis_title="ZIP count")
hist.write_html("miles_histogram.html", include_plotlyjs="cdn")
print("Wrote", Path("miles_histogram.html").resolve())


Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/miles_histogram.html


In [19]:
df

Unnamed: 0,zip,CITY,COUNTY,hosp_name,hosp_lat,hosp_lon,zip_lat,zip_lon,miles_geo,miles_drive,time_text,time_sec,http_code,delta_miles,ratio_drive_to_geo
0,30002,Avondale Estates,89,Wellstar Atlanta Medical Center,33.7629,-84.3730,33.773,-84.265,6.269054,7.725632,15min,924.1,200,1.456578,1.232344
1,30003,Norcross,135,Piedmont Walton Hospital,33.7989,-83.7503,33.681,-84.007,16.889499,20.238557,34min,2012.2,200,3.349058,1.198292
2,30004,Alpharetta,121,Wellstar North Fulton Hospital,34.0632,-84.3199,34.123,-84.297,4.313298,6.093352,14min,847.3,200,1.780054,1.412690
3,30005,Alpharetta,121,Wellstar North Fulton Hospital,34.0632,-84.3199,34.081,-84.201,6.936640,8.753007,17min,1022.7,200,1.816367,1.261851
4,30006,Marietta,67,WellStar Kennestone Hospital,33.9696,-84.5516,33.954,-84.541,1.247254,1.769603,5min,320.0,200,0.522349,1.418799
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
945,39877,Parrott,273,Crisp Regional Hospital,31.9776,-83.7813,31.894,-84.507,43.028047,51.938678,1hour21min,4874.7,200,8.910631,1.207089
946,39885,Sasser,273,Crisp Regional Hospital,31.9776,-83.7813,31.713,-84.350,38.068236,48.004094,1hour11min,4239.2,200,9.935858,1.261001
947,39886,Shellman,243,Crisp Regional Hospital,31.9776,-83.7813,31.761,-84.607,50.770906,64.608810,1hour33min,5564.5,200,13.837904,1.272556
948,39897,Whigham,131,John D. Archbold Memorial Hospital,30.8252,-83.9721,30.936,-84.315,21.741835,24.635255,40min,2428.3,200,2.893420,1.133081


In [3]:
# trauma_plots_multi_winner.py
# Builds interactive visuals from the 3-winner CSV.
# v2: Adds .dropna() to handle missing API results.

from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# === 1) INPUT / OUTPUT PATHS ===
# Point this to your NEW CSV created by SAS (all_winners_by_zip export).
INPUT_CSV = Path("trauma_winners_by_zip.csv")

# Output HTML files
OUT_TIME_WINNER_MAP        = Path("time_winner_map.html")
OUT_DRIVE_MILES_WINNER_MAP = Path("drive_miles_winner_map.html")
OUT_SCATTER_GEO_DRIVE    = Path("scatter_for_geo_winner.html")
OUT_STRAIGHT_LINES       = Path("straight_lines_for_geo_winner.html")
OUT_WINNER_COMPARE_BAR   = Path("winner_comparison.html")

# === 2) LOAD & PREPARE DATA ===
if not INPUT_CSV.exists():
    raise FileNotFoundError(f"Could not find {INPUT_CSV.resolve()}")

df = pd.read_csv(INPUT_CSV)
expected_cols = {
    # minimal columns we will use
    "zip", "CITY", "hosp_name", "metric_type",
    "zip_lon", "zip_lat", "hosp_lon", "hosp_lat",
    "miles_geo", "miles_drive", "time_sec"
}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"CSV missing columns: {sorted(missing)}. Did you add the lat/lon columns to the SAS keep step?")

# Useful derived field
df["time_min"] = df["time_sec"] / 60.0

# --- Create 3 separate dataframes, one for each "winner" type ---
# 1. Winners by Driving Time (for the time bubble map)
df_time_winner = df[df["metric_type"] == "Driving Time"].copy()

# 2. Winners by Driving Distance (for the miles bubble map)
df_drive_winner = df[df["metric_type"] == "Driving Distance"].copy()

# 3. Winners by Straight-Line (for scatter & straight-line map)
df_geo_winner = df[df["metric_type"] == "Straight-Line"].copy()

if df_time_winner.empty or df_drive_winner.empty or df_geo_winner.empty:
    raise ValueError("CSV is missing one of the 'metric_type' winners. Check the file.")

# === 3) TIME MAP (WINNER by Driving Time) ===
print(f"Generating {OUT_TIME_WINNER_MAP}...")

# --- FIX: Filter out NaN values that cause Plotly to crash ---
plot_df_time = df_time_winner.dropna(subset=["time_min"])

if plot_df_time.empty:
    print(f"Skipping {OUT_TIME_WINNER_MAP}, no valid time data found.")
else:
    time_map = px.scatter_geo(
        plot_df_time.rename(columns={"zip_lon": "x", "zip_lat": "y"}), # Use filtered data
        lon="x", lat="y",
        size="time_min", color="time_min",
        color_continuous_scale="Blues",
        hover_name="CITY",
        hover_data={
            "zip": True,
            "hosp_name": True,
            "time_min": ":.1f",
            "metric_type": True
        },
        scope="usa", projection="albers usa",
        title="WINNER by Driving Time (minutes)"
    )
    time_map.update_layout(margin=dict(l=0, r=0, t=60, b=0))
    time_map.write_html(OUT_TIME_WINNER_MAP, include_plotlyjs="cdn")
    print(f"Wrote {OUT_TIME_WINNER_MAP.resolve()}")

# === 4) SCATTER: Straight-line vs Driving (for GEO WINNER) ===
print(f"Generating {OUT_SCATTER_GEO_DRIVE}...")

# --- FIX: Filter out NaN values that cause Plotly to crash ---
plot_df_scatter = df_geo_winner.dropna(subset=["miles_geo", "miles_drive"])

if plot_df_scatter.empty:
    print(f"Skipping {OUT_SCATTER_GEO_DRIVE}, no valid scatter data found.")
else:
    scatter = px.scatter(
        plot_df_scatter, # Use filtered data
        x="miles_geo", y="miles_drive",
        hover_data=["zip", "CITY", "hosp_name"],
        labels={"miles_geo": "Geodesic miles", "miles_drive": "Driving miles"},
        title="Straight-line vs Driving Distance (Data from Straight-Line Winner)"
    )
    # Add 45° reference line (y=x)
    m = float(pd.concat([plot_df_scatter["miles_geo"], plot_df_scatter["miles_drive"]]).max())
    scatter.add_shape(
        type="line", x0=0, y0=0, x1=m, y1=m,
        line=dict(color="lightblue")
    )
    scatter.update_layout(margin=dict(l=20, r=20, t=60, b=60))
    scatter.write_html(OUT_SCATTER_GEO_DRIVE, include_plotlyjs="cdn")
    print(f"Wrote {OUT_SCATTER_GEO_DRIVE.resolve()}")

# === 5) STRAIGHT-LINE CONNECTIONS (for GEO WINNER) ===
# (No .dropna() needed here as lat/lon are always present)
print(f"Generating {OUT_STRAIGHT_LINES}...")
fig_lines = go.Figure()

# Lightweight, readable line layer per ZIP
for row in df_geo_winner.itertuples(index=False):
    fig_lines.add_trace(go.Scattergeo(
        lon=[row.zip_lon, row.hosp_lon],
        lat=[row.zip_lat, row.hosp_lat],
        mode="lines",
        line=dict(width=1, color="rgba(0,90,200,0.25)"),
        hoverinfo="skip",
        showlegend=False
    ))

# Endpoints: ZIPs
fig_lines.add_trace(go.Scattergeo(
    lon=df_geo_winner["zip_lon"], lat=df_geo_winner["zip_lat"],
    mode="markers",
    marker=dict(size=3, color="midnightblue"),
    name="ZIP centroids",
    hovertext=df_geo_winner["zip"].astype(str) + ": " + df_geo_winner["CITY"].astype(str),
    hoverinfo="text"
))
# Endpoints: Trauma centers
fig_lines.add_trace(go.Scattergeo(
    lon=df_geo_winner["hosp_lon"], lat=df_geo_winner["hosp_lat"],
    mode="markers",
    marker=dict(size=7, color="crimson", symbol="diamond"),
    name="Trauma centers",
    hovertext=df_geo_winner["hosp_name"],
    hoverinfo="text"
))

fig_lines.update_geos(
    scope="usa",
    projection_type="albers usa",
    showcountries=False,
    showland=True
)
fig_lines.update_layout(
    title="Straight-Line Connections (ZIP → Straight-Line Winner)",
    margin=dict(l=0, r=0, t=60, b=0)
)
fig_lines.write_html(OUT_STRAIGHT_LINES, include_plotlyjs="cdn")
print(f"Wrote {OUT_STRAIGHT_LINES.resolve()}")

# === 6) DRIVE MILES MAP (WINNER by Driving Distance) ===
print(f"Generating {OUT_DRIVE_MILES_WINNER_MAP}...")

# --- FIX: Filter out NaN values that cause Plotly to crash ---
plot_df_drive = df_drive_winner.dropna(subset=["miles_drive"])

if plot_df_drive.empty:
    print(f"Skipping {OUT_DRIVE_MILES_WINNER_MAP}, no valid drive distance data found.")
else:
    miles_map = px.scatter_geo(
        plot_df_drive.rename(columns={"zip_lon": "x", "zip_lat": "y"}), # Use filtered data
        lon="x", lat="y",
        size="miles_drive", color="miles_drive",
        color_continuous_scale="Viridis",
        hover_name="CITY",
        hover_data={
            "zip": True,
            "hosp_name": True,
            "miles_drive": ":.1f",
            "miles_geo": ":.1f",
            "time_min": ":.1f"
        },
        scope="usa", projection="albers usa",
        title="WINNER by Driving Distance (miles)"
    )
    miles_map.update_layout(margin=dict(l=0, r=0, t=60, b=0))
    miles_map.write_html(OUT_DRIVE_MILES_WINNER_MAP, include_plotlyjs="cdn")
    print(f"Wrote {OUT_DRIVE_MILES_WINNER_MAP.resolve()}")


# === 7) NEW: Bar chart comparing the winners ===
print(f"Generating {OUT_WINNER_COMPARE_BAR}...")

# Pivot the data: one row per ZIP, columns show the winning hospital for each metric
try:
    df_pivot = df.pivot(
        index=["zip", "CITY"], # Use CITY for hover
        columns="metric_type",
        values="hosp_name"
    ).reset_index()
except Exception as e:
    print(f"Could not pivot data, skipping bar chart. Error: {e}")
    print("This can happen if some ZIPs are missing a metric_type.")
    df_pivot = pd.DataFrame() # make it empty

if not df_pivot.empty:
    # Define "mismatch"
    # We care most if the Straight-Line winner is NOT the Driving Time winner
    df_pivot["Mismatch"] = (
        df_pivot["Straight-Line"] != df_pivot["Driving Time"]
    )

    # Create a simple summary for the bar chart
    mismatch_counts = df_pivot["Mismatch"].value_counts().reset_index()
    mismatch_counts.columns = ["Match_Type", "Count"] # Rename columns
    mismatch_counts["Match_Type"] = mismatch_counts["Match_Type"].map({
        True: "Different Winner (Geo vs. Time)",
        False: "Same Winner (Geo == Time)"
    })

    # Create the bar chart
    bar_fig = px.bar(
        mismatch_counts,
        x="Match_Type",
        y="Count",
        color="Match_Type",
        color_discrete_map={
             "Different Winner (Geo vs. Time)": "crimson",
             "Same Winner (Geo == Time)": "cornflowerblue"
        },
        labels={"Match_Type": "Comparison", "Count": "Number of ZIP Codes"},
        title="How Many ZIPs Have a Different 'Nearest' vs. 'Fastest' Hospital?",
        text="Count" # Show count on bars
    )
    bar_fig.update_layout(showlegend=False)
    bar_fig.write_html(OUT_WINNER_COMPARE_BAR, include_plotlyjs="cdn")
    print(f"Wrote {OUT_WINNER_COMPARE_BAR.resolve()}")

print("\nAll plots generated successfully.")

Generating time_winner_map.html...
Skipping time_winner_map.html, no valid time data found.
Generating scatter_for_geo_winner.html...
Skipping scatter_for_geo_winner.html, no valid scatter data found.
Generating straight_lines_for_geo_winner.html...
Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/straight_lines_for_geo_winner.html
Generating drive_miles_winner_map.html...
Skipping drive_miles_winner_map.html, no valid drive distance data found.
Generating winner_comparison.html...
Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/winner_comparison.html

All plots generated successfully.


In [5]:
# trauma_plots_winners.py
# Build enhanced Plotly visuals from trauma_winners_by_zip.csv
# Preserves all ZIP-hospital-metric combinations and imputes missing time_min intelligently.

from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# === 1) INPUT / OUTPUT PATHS ===
INPUT_CSV = Path("trauma_winners_by_zip.csv")

OUT_TIME_MAP = Path("time_map.html")
OUT_SCATTER_GEO_DRIVE = Path("scatter_geo_drive.html")
OUT_STRAIGHT_LINES = Path("straight_lines.html")
OUT_COUNTY_SUMMARY = Path("county_avg_time.html")

# === 2) LOAD & CHECK DATA ===
if not INPUT_CSV.exists():
    raise FileNotFoundError(f"Could not find {INPUT_CSV.resolve()}")

df = pd.read_csv(INPUT_CSV)

expected_cols = {
    "zip", "CITY", "COUNTY", "hosp_name",
    "zip_lon", "zip_lat", "hosp_lon", "hosp_lat",
    "miles_geo", "miles_drive", "time_sec", "time_text", "metric_type"
}
missing = expected_cols - set(df.columns)
if missing:
    print("⚠️ Warning: CSV missing columns:", sorted(missing))

# Keep only rows with coordinates
df = df.dropna(subset=["zip_lon", "zip_lat", "hosp_lon", "hosp_lat"]).copy()

# === 3) HANDLE MISSING / INCONSISTENT TIME_MIN VALUES ===

# Step 1: Ensure numeric time_sec and derive time_min
df["time_sec"] = pd.to_numeric(df["time_sec"], errors="coerce")
df["time_min"] = df["time_sec"] / 60.0

# Step 2: Fill missing time_min within each (ZIP, hospital) pair
df["time_min"] = df.groupby(["zip", "hosp_name"])["time_min"].transform(
    lambda x: x.fillna(x.mean())
)

# Step 3: Fallback to ZIP-level average if still missing
df["time_min"] = df.groupby("zip")["time_min"].transform(
    lambda x: x.fillna(x.mean())
)

# Step 4: Final fallback to overall median (ensures no NaNs remain)
overall_median = df["time_min"].median()
df["time_min"] = df["time_min"].fillna(overall_median)

# Separate metric subsets
df_drive = df[df["metric_type"].str.contains("Driving", case=False, na=False)]
df_straight = df[df["metric_type"].str.contains("Straight", case=False, na=False)]

# === 4) TIME MAP — Driving times (bubble size + color) ===
if df_drive.empty:
    print("⚠️ No Driving records found — skipping time map.")
else:
    time_map = px.scatter_geo(
        df_drive.rename(columns={"zip_lon": "x", "zip_lat": "y"}),
        lon="x", lat="y",
        size="time_min", color="time_min",
        color_continuous_scale="Blues",
        hover_name="CITY",
        hover_data={
            "zip": True,
            "COUNTY": True,
            "hosp_name": True,
            "metric_type": True,
            "time_min": ":.1f",
            "miles_drive": ":.1f"
        },
        scope="usa", projection="albers usa",
        title="Estimated Driving Time to Nearest Trauma Center (minutes)"
    )
    time_map.update_layout(margin=dict(l=0, r=0, t=60, b=0))
    time_map.write_html(OUT_TIME_MAP, include_plotlyjs="cdn")
    print(f"✅ Wrote {OUT_TIME_MAP.resolve()}")

# === 5) SCATTER — Straight-line vs Driving miles ===
if df_drive.empty:
    print("⚠️ No Driving records found — skipping scatter plot.")
else:
    scatter = px.scatter(
        df_drive,
        x="miles_geo", y="miles_drive",
        hover_data=["zip", "CITY", "COUNTY", "hosp_name", "metric_type"],
        labels={"miles_geo": "Geodesic miles", "miles_drive": "Driving miles"},
        color="time_min",
        color_continuous_scale="Viridis",
        title="Straight-Line vs Driving Distance to Nearest Trauma Center (GA)"
    )
    m = float(pd.concat([df_drive["miles_geo"], df_drive["miles_drive"]]).max())
    scatter.add_shape(
        type="line", x0=0, y0=0, x1=m, y1=m,
        line=dict(color="lightblue", dash="dot")
    )
    scatter.update_layout(margin=dict(l=20, r=20, t=60, b=60))
    scatter.write_html(OUT_SCATTER_GEO_DRIVE, include_plotlyjs="cdn")
    print(f"✅ Wrote {OUT_SCATTER_GEO_DRIVE.resolve()}")

# === 6) STRAIGHT-LINE CONNECTION MAP (ZIP → hospital) ===
if df_drive.empty:
    print("⚠️ No Driving records found — skipping connection map.")
else:
    fig_lines = go.Figure()

    for row in df_drive.itertuples(index=False):
        fig_lines.add_trace(go.Scattergeo(
            lon=[row.zip_lon, row.hosp_lon],
            lat=[row.zip_lat, row.hosp_lat],
            mode="lines",
            line=dict(width=1, color="rgba(0,90,200,0.25)"),
            hoverinfo="skip",
            showlegend=False
        ))

    # ZIP centroids
    fig_lines.add_trace(go.Scattergeo(
        lon=df_drive["zip_lon"], lat=df_drive["zip_lat"],
        mode="markers",
        marker=dict(size=3, color="midnightblue"),
        name="ZIP centroids",
        hovertext=df_drive["zip"].astype(str) + " — " + df_drive["CITY"].astype(str),
        hoverinfo="text"
    ))
    # Trauma centers
    fig_lines.add_trace(go.Scattergeo(
        lon=df_drive["hosp_lon"], lat=df_drive["hosp_lat"],
        mode="markers",
        marker=dict(size=7, color="crimson", symbol="diamond"),
        name="Trauma centers",
        hovertext=df_drive["hosp_name"],
        hoverinfo="text"
    ))

    fig_lines.update_geos(scope="usa", projection_type="albers usa", showland=True)
    fig_lines.update_layout(
        title="Straight-Line Connections: ZIP → Nearest Trauma Center (GA)",
        margin=dict(l=0, r=0, t=60, b=0)
    )
    fig_lines.write_html(OUT_STRAIGHT_LINES, include_plotlyjs="cdn")
    print(f"✅ Wrote {OUT_STRAIGHT_LINES.resolve()}")

# === 7) COUNTY AVERAGE DRIVING TIME BAR CHART ===
if df_drive.empty:
    print("⚠️ No Driving records found — skipping county summary.")
else:
    county_summary = df_drive.groupby("COUNTY", as_index=False)["time_min"].mean()
    county_summary = county_summary.sort_values("time_min", ascending=False)

    fig_bar = px.bar(
        county_summary,
        x="COUNTY", y="time_min",
        color="time_min",
        color_continuous_scale="Bluered_r",
        title="Average Driving Time to Trauma Center by County (GA)",
        labels={"time_min": "Average minutes"}
    )
    fig_bar.update_layout(xaxis_tickangle=45)
    fig_bar.write_html(OUT_COUNTY_SUMMARY, include_plotlyjs="cdn")
    print(f"✅ Wrote {OUT_COUNTY_SUMMARY.resolve()}")


✅ Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/time_map.html
✅ Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/scatter_geo_drive.html
✅ Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/straight_lines.html
✅ Wrote /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/county_avg_time.html


In [27]:
# trauma_zip_map_combined_viridis.py
# Single Georgia ZIP-code map shaded by driving distance (Viridis color scale)
# Hover shows both driving time and driving distance.

from pathlib import Path
import pandas as pd
import plotly.express as px
import json

# === 1) INPUT FILES ===
DATA_FILE = Path("trauma_winners_by_zip.csv")
GEOJSON_FILE = Path("ga_zipcodes.geojson")

# === 2) CHECK INPUTS ===
if not DATA_FILE.exists():
    raise FileNotFoundError(f"Could not find {DATA_FILE.resolve()}")
if not GEOJSON_FILE.exists():
    raise FileNotFoundError(f"Could not find {GEOJSON_FILE.resolve()}")

# === 3) LOAD DATA ===
df = pd.read_csv(DATA_FILE)

# Keep only driving-related rows
df_drive = df[df["metric_type"].str.contains("Driving", case=False, na=False)].copy()

# Compute driving time in minutes
df_drive["time_sec"] = pd.to_numeric(df_drive["time_sec"], errors="coerce")
df_drive["time_min"] = df_drive["time_sec"] / 60.0

# Ensure ZIP codes are properly formatted
df_drive["zip"] = df_drive["zip"].astype(str).str.zfill(5)

# === 4) LOAD ZIP CODE GEOJSON ===
with open(GEOJSON_FILE) as f:
    ga_zipcodes = json.load(f)

# === 5) BUILD COMBINED HOVER TEXT ===
df_drive["hover_text"] = (
    "<b>ZIP:</b> " + df_drive["zip"].astype(str) +
    "<br><b>City:</b> " + df_drive["CITY"].astype(str) +
    "<br><b>County:</b> " + df_drive["COUNTY"].astype(str) +
    "<br><b>Hospital:</b> " + df_drive["hosp_name"].astype(str) +
    "<br><b>Metric:</b> " + df_drive["metric_type"].astype(str) +
    "<br><b>Driving Distance (mi):</b> " + df_drive["miles_drive"].round(1).astype(str) +
    "<br><b>Driving Time (min):</b> " + df_drive["time_min"].round(1).astype(str)
)

# === 6) CREATE MAP (COLOR = miles_drive, Viridis color scale) ===
fig = px.choropleth_mapbox(
    df_drive,
    geojson=ga_zipcodes,
    locations="zip",
    featureidkey="properties.ZCTA5CE10",
    color="miles_drive",
    color_continuous_scale="Viridis",  # ← same as earlier distance map
    mapbox_style="carto-positron",
    hover_name="CITY",
    hover_data=None,
    custom_data=["hover_text"],
    title="Georgia Trauma Center Accessibility — Driving Distance & Time (ZIP Polygons)",
    zoom=6,
    center={"lat": 32.9, "lon": -83.3},
    opacity=0.8
)

fig.update_traces(
    hovertemplate="%{customdata[0]}<extra></extra>",
    marker_line_width=0.6,
    marker_line_color="white"
)

# === 7) SAVE MAP ===
fig.write_html("trauma_access_map_viridis.html", include_plotlyjs="cdn")
print("✅ Wrote ZIP-level trauma accessibility map:", Path("trauma_access_map_viridis.html").resolve())


✅ Wrote ZIP-level trauma accessibility map: /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/trauma_access_map_viridis.html


In [28]:
df_drive

Unnamed: 0,zip,CITY,COUNTY,hosp_name,hosp_lat,hosp_lon,zip_lat,zip_lon,winning_value,miles_drive,time_sec,time_text,metric_type,miles_geo,time_min,hover_text
0,30002,Avondale Estates,89,Wellstar Atlanta Medical Center,33.7629,-84.3730,33.773,-84.265,7.439739,,929.8,15min,Driving Distance,6.269054,15.496667,<b>ZIP:</b> 30002<br><b>City:</b> Avondale Est...
1,30002,Avondale Estates,89,Wellstar Atlanta Medical Center,33.7629,-84.3730,33.773,-84.265,929.800000,7.439739,,15min,Driving Time,6.269054,,<b>ZIP:</b> 30002<br><b>City:</b> Avondale Est...
3,30003,Norcross,135,Piedmont Walton Hospital,33.7989,-83.7503,33.681,-84.007,20.238557,,2012.2,34min,Driving Distance,16.889499,33.536667,<b>ZIP:</b> 30003<br><b>City:</b> Norcross<br>...
4,30003,Norcross,135,Piedmont Walton Hospital,33.7989,-83.7503,33.681,-84.007,2012.200000,20.238557,,34min,Driving Time,16.889499,,<b>ZIP:</b> 30003<br><b>City:</b> Norcross<br>...
6,30004,Alpharetta,121,Wellstar North Fulton Hospital,34.0632,-84.3199,34.123,-84.297,6.093352,,847.3,14min,Driving Distance,4.313298,14.121667,<b>ZIP:</b> 30004<br><b>City:</b> Alpharetta<b...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2842,39886,Shellman,243,Crisp Regional Hospital,31.9776,-83.7813,31.761,-84.607,5564.500000,64.608810,,1hour33min,Driving Time,50.770906,,<b>ZIP:</b> 39886<br><b>City:</b> Shellman<br>...
2844,39897,Whigham,131,John D. Archbold Memorial Hospital,30.8252,-83.9721,30.936,-84.315,24.635255,,2428.3,40min,Driving Distance,21.741835,40.471667,<b>ZIP:</b> 39897<br><b>City:</b> Whigham<br><...
2845,39897,Whigham,131,John D. Archbold Memorial Hospital,30.8252,-83.9721,30.936,-84.315,2428.300000,24.635255,,40min,Driving Time,21.741835,,<b>ZIP:</b> 39897<br><b>City:</b> Whigham<br><...
2847,39901,Atlanta,89,Wellstar Atlanta Medical Center,33.7629,-84.3730,33.891,-84.288,13.099623,,1244.3,21min,Driving Distance,10.086823,20.738333,<b>ZIP:</b> 39901<br><b>City:</b> Atlanta<br><...


In [38]:
# trauma_zip_map_combined_viridis.py
# Single Georgia ZIP-code map shaded by driving distance (Viridis color scale)
# Hover shows both driving time and driving distance.

from pathlib import Path
import pandas as pd
import plotly.express as px
import json

# === 1) INPUT FILES ===
DATA_FILE = Path("trauma_winners_by_zip.csv")
GEOJSON_FILE = Path("ga_zipcodes.geojson")

# === 2) CHECK INPUTS ===
if not DATA_FILE.exists():
    raise FileNotFoundError(f"Could not find {DATA_FILE.resolve()}")
if not GEOJSON_FILE.exists():
    raise FileNotFoundError(f"Could not find {GEOJSON_FILE.resolve()}")

# === 3) LOAD DATA ===
df = pd.read_csv(DATA_FILE)

# Keep only driving-related rows
df_drive = df[df["metric_type"].str.contains("Driving", case=False, na=False)].copy()

# Compute driving time in minutes

df_drive["time_min"] = df_drive["time_text"]

# Ensure ZIP codes are properly formatted
df_drive["zip"] = df_drive["zip"].astype(str).str.zfill(5)

# === 4) LOAD ZIP CODE GEOJSON ===
with open(GEOJSON_FILE) as f:
    ga_zipcodes = json.load(f)

# === 5) BUILD COMBINED HOVER TEXT ===
df_drive["hover_text"] = (
    "<b>ZIP:</b> " + df_drive["zip"].astype(str) +
    "<br><b>City:</b> " + df_drive["CITY"].astype(str) +
    "<br><b>County:</b> " + df_drive["COUNTY"].astype(str) +
    "<br><b>Hospital:</b> " + df_drive["hosp_name"].astype(str) +
    "<br><b>Metric:</b> Driving distance & Time" +
    "<br><b>Driving Distance (mi):</b> " + df_drive["miles_drive"].round(1).astype(str) +
    "<br><b>Driving Time (min):</b> " + df_drive["time_min"].astype(str)
)

# === 6) CREATE MAP (COLOR = miles_drive, Viridis color scale) ===
fig = px.choropleth_mapbox(
    df_drive,
    geojson=ga_zipcodes,
    locations="zip",
    featureidkey="properties.ZCTA5CE10",
    color="miles_drive",
    color_continuous_scale="Viridis",  # ← same as earlier distance map
    mapbox_style="carto-positron",
    hover_name="CITY",
    hover_data=None,
    custom_data=["hover_text"],
    title="Georgia Trauma Center Accessibility — Driving Distance & Time (ZIP Polygons)",
    zoom=6,
    center={"lat": 32.9, "lon": -83.3},
    opacity=0.8
)

fig.update_traces(
    hovertemplate="%{customdata[0]}<extra></extra>",
    marker_line_width=0.6,
    marker_line_color="white"
)

# === 7) SAVE MAP ===
fig.write_html("trauma_access_map_viridis.html", include_plotlyjs="cdn")
print("✅ Wrote ZIP-level trauma accessibility map:", Path("trauma_access_map_viridis.html").resolve())


✅ Wrote ZIP-level trauma accessibility map: /Users/abiodun.idowu/Library/CloudStorage/GoogleDrive-idowurasheedabiodun@gmail.com/My Drive/Projects:papers/Stat Consulting/Map Project/trauma_access_map_viridis.html


In [36]:
df_drive[df_drive["zip"] == "30032"]


Unnamed: 0,zip,CITY,COUNTY,hosp_name,hosp_lat,hosp_lon,zip_lat,zip_lon,winning_value,miles_drive,time_sec,time_text,metric_type,miles_geo,time_min,hover_text
84,30032,Decatur,89,Grady Health System,33.7517,-84.3821,33.735,-84.267,7.551027,,876.7,15min,Driving Distance,6.721904,15min,<b>ZIP:</b> 30032<br><b>City:</b> Decatur<br><...
85,30032,Decatur,89,Grady Health System,33.7517,-84.3821,33.735,-84.267,876.7,7.551027,,15min,Driving Time,6.721904,15min,<b>ZIP:</b> 30032<br><b>City:</b> Decatur<br><...
