In [3]:
# Download a local static Chicago map (optional fallback)
import os, sys, subprocess
from pathlib import Path

MAP_DIR = Path('local_maps')
MAP_DIR.mkdir(exist_ok=True)
STATIC_MAP_PATH = MAP_DIR / 'chicago_static_map.png'

def ensure_static_map(path=STATIC_MAP_PATH, size=(1024, 768), center=(41.8781, -87.6298), zoom=11):
    try:
        try:
            from staticmap import StaticMap, CircleMarker
        except Exception:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'staticmap', 'Pillow'])
            from staticmap import StaticMap, CircleMarker
        width, height = size
        m = StaticMap(width, height, url_template='https://tile.openstreetmap.org/{z}/{x}/{y}.png')
        # Add a transparent marker just to fix center
        cm = CircleMarker((center[1], center[0]), '#00000000', 1)
        m.add_marker(cm)
        image = m.render(zoom=zoom, center=(center[1], center[0]))
        image.save(path)
        return True
    except Exception as e:
        print(f'Could not create static map: {e}')
        return False

if not STATIC_MAP_PATH.exists():
    print('Generating local static map of Chicago...')
    ok = ensure_static_map(STATIC_MAP_PATH)
    if ok:
        print(f'Local static map saved to {STATIC_MAP_PATH.resolve()}')
    else:
        print('Static map generation skipped.')
else:
    print(f'Found existing static map at {STATIC_MAP_PATH.resolve()}')

Found existing static map at /Users/simon/altair-hw/Watcher-S.github.io/python_notebooks/local_maps/chicago_static_map.png


In [4]:
# Auto-download dataset if missing (DISABLED) — using SODA API instead
import os, sys, subprocess
from pathlib import Path

CSV_NAME = 'traffic_crashes_full.csv'
CSV_PATH = Path(CSV_NAME)

# Ensure requests is available for SODA API calls in the next cell
try:
    import requests
except Exception:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'requests'])
    import requests

# Do not download large CSV; rely on SODA API loader cell.
print('Skipping local CSV download. Using Chicago SODA API for data loading.')


Skipping local CSV download. Using Chicago SODA API for data loading.


In [5]:
# Prefer Chicago SODA API loader (fallback to local CSV)
import os
import pandas as pd
from urllib.parse import urlencode
import math

SODA_BASE = 'https://data.cityofchicago.org/resource/85ca-t3if.csv'
SODA_JSON = 'https://data.cityofchicago.org/resource/85ca-t3if.json'
SODA_APP_TOKEN = os.environ.get('CHICAGO_SODA_TOKEN', None)  # optional

# Query params: adjust defaults as needed
params = {
    '$select': '*',
    '$order': 'crash_date DESC',
}
# Example date filter (comment out to fetch all allowed by limit)
# params['$where'] = "crash_date >= '2024-01-01T00:00:00' AND crash_date <= '2024-12-31T23:59:59'"

DEFAULT_LIMIT = 50000  # per page
MAX_ROWS = 250000      # total cap to keep memory reasonable

use_soda = True  # set False to force local CSV usage
csv_path = 'traffic_crashes_full.csv'


def soda_fetch_json(params, limit=DEFAULT_LIMIT, max_rows=MAX_ROWS):
    headers = {}
    if SODA_APP_TOKEN:
        headers['X-App-Token'] = SODA_APP_TOKEN
    rows = []
    offset = 0
    pages = math.ceil(max_rows / limit)
    for _ in range(pages):
        q = params.copy()
        q['$limit'] = limit
        q['$offset'] = offset
        url = f"{SODA_JSON}?{urlencode(q)}"
        r = requests.get(url, headers=headers, timeout=60)
        r.raise_for_status()
        batch = r.json()
        if not batch:
            break
        rows.extend(batch)
        if len(batch) < limit:
            break
        offset += limit
    return rows


def soda_rows_to_df(rows):
    if not rows:
        return pd.DataFrame()
    df = pd.DataFrame(rows)
    # Normalize expected columns used below
    # Map Socrata field names to those expected by the dashboard
    # Socrata fields (lowercase) -> expected uppercase
    col_map = {
        'crash_date': 'CRASH_DATE',
        'prim_contributory_cause': 'PRIM_CONTRIBUTORY_CAUSE',
        'crash_month': 'CRASH_MONTH',
        'crash_hour': 'CRASH_HOUR',
        'latitude': 'LATITUDE',
        'longitude': 'LONGITUDE',
    }
    for src, dst in col_map.items():
        if src in df.columns and dst not in df.columns:
            df[dst] = df[src]
    return df


if use_soda:
    print('Loading crash data from Chicago SODA API...')
    rows = soda_fetch_json(params)
    df = soda_rows_to_df(rows)
    print(f"Loaded {len(df)} rows from SODA API")
else:
    print('Loading crash data from local CSV...')
    df = pd.read_csv(csv_path)


Loading crash data from Chicago SODA API...
Loaded 250000 rows from SODA API
Loaded 250000 rows from SODA API


In [6]:
# Environment setup without magics
import sys, subprocess
pkgs = ['pandas','numpy','ipywidgets','bqplot']
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q'] + pkgs)
import ipywidgets as widgets
widgets.Widget.widget_types

  widgets.Widget.widget_types


<ipywidgets.widgets.widget.WidgetRegistry at 0x116071550>

In [7]:
# Environment setup (safe to re-run)
%pip install -q pandas numpy ipywidgets bqplot
import ipywidgets as widgets
widgets.Widget.widget_types

Note: you may need to restart the kernel to use updated packages.


  widgets.Widget.widget_types


<ipywidgets.widgets.widget.WidgetRegistry at 0x116071550>

# Final Project Part 2 — Viz for Experts

- Group Members: [Add names here]
- Dataset: `traffic_crashes_full.csv` (loaded via relative path)

This notebook implements a simple, reliable bqplot dashboard with a driver plot (crashes by month) and a driven plot (top primary crash causes) linked via selection.

In [None]:
# Import libraries and load data
import pandas as pd
import numpy as np
import bqplot as bq
import ipywidgets as widgets
from IPython.display import display
import os
from pathlib import Path

# Try map library (will be forced off below)
try:
    from ipyleaflet import Map, Heatmap, basemaps, basemap_to_tiles, DrawControl
    MAP_OK = True
except Exception:
    MAP_OK = False

# FORCE LOCAL MAP (disable ipyleaflet usage)
MAP_OK = False

# Ensure a DataFrame `df` is available: prefer SODA loader, else fallback to small SODA pull
if 'df' not in globals() or not isinstance(df, pd.DataFrame) or df.empty:
    try:
        from urllib.parse import urlencode
        import math, requests
        SODA_JSON = 'https://data.cityofchicago.org/resource/85ca-t3if.json'
        params = {
            '$select': '*',
            '$order': 'crash_date DESC',
            '$limit': 10000,
        }
        url = f"{SODA_JSON}?{urlencode(params)}"
        rows = requests.get(url, timeout=60).json()
        df = pd.DataFrame(rows)
        print(f"Loaded fallback {len(df)} rows from SODA API")
    except Exception as e:
        raise RuntimeError(f"Failed to obtain data from SODA API: {e}")

# Use explicit columns from dataset
DATE_COL = 'CRASH_DATE'
CAUSE_COL = 'PRIM_CONTRIBUTORY_CAUSE'
MONTH_COL = 'CRASH_MONTH'
HOUR_COL = 'CRASH_HOUR'
LAT_COL = 'LATITUDE'
LON_COL = 'LONGITUDE'

# If SODA provided lowercase names, map them to expected names
if DATE_COL not in df.columns and 'crash_date' in df.columns:
    df[DATE_COL] = df['crash_date']
if CAUSE_COL not in df.columns and 'prim_contributory_cause' in df.columns:
    df[CAUSE_COL] = df['prim_contributory_cause']
if MONTH_COL not in df.columns and 'crash_month' in df.columns:
    df[MONTH_COL] = df['crash_month']
if HOUR_COL not in df.columns and 'crash_hour' in df.columns:
    df[HOUR_COL] = df['crash_hour']
if LAT_COL not in df.columns and 'latitude' in df.columns:
    df[LAT_COL] = df['latitude']
if LON_COL not in df.columns and 'longitude' in df.columns:
    df[LON_COL] = df['longitude']

# Parse dates, drop unknowns
df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors='coerce', infer_datetime_format=True)
df = df.dropna(subset=[DATE_COL])
df['YEAR'] = df[DATE_COL].dt.year

# Month
if MONTH_COL in df.columns:
    df['MONTH'] = pd.to_numeric(df[MONTH_COL], errors='coerce')
else:
    df['MONTH'] = df[DATE_COL].dt.month

# Hour
if HOUR_COL in df.columns:
    df['HOUR'] = pd.to_numeric(df[HOUR_COL], errors='coerce').clip(0,23).fillna(0).astype(int)
else:
    df['HOUR'] = df[DATE_COL].dt.hour.fillna(0).astype(int)

# Normalize cause
if CAUSE_COL in df.columns:
    df[CAUSE_COL] = df[CAUSE_COL].fillna('Unknown').astype(str).str.strip()
else:
    cat_candidates = [c for c in df.columns if df[c].dtype == 'object']
    CAUSE_COL = cat_candidates[0] if cat_candidates else None


FileNotFoundError: [Errno 2] No such file or directory: 'traffic_crashes_full.csv'

# How to Use This Dashboard

This dashboard has two linked charts:
- The top chart shows total crash counts by month. Click one or more bars to select months.
- The bottom chart updates to show the top 10 primary crash causes within the selected months. If no months are selected, it shows the overall top causes.

Tips:
- Rotate your view or scroll horizontally if month labels wrap.
- Use multiple selections to compare seasonal patterns.
- Data loads from the Chicago SODA API (Traffic Crashes — Crashes). No local CSV is required.


# Contextual Datasets

- Chicago Traffic Volume (IDOT): https://idot.illinois.gov/transportation-system/Network-Overview/Highway-System/Traffic-Data.html — could contextualize crash counts by exposure (vehicle miles traveled) to assess risk.
- Weather Data (NOAA, Chicago area): https://www.ncei.noaa.gov/ — weather conditions (precipitation, snow, temperature) may explain seasonal crash patterns.
- Public Transit Ridership (CTA): https://www.transitchicago.com/performance/ — changes in ridership could relate to shifts in road usage and crash trends.


# Data Size and Hosting Plan

This notebook retrieves crash data directly from the Chicago SODA API, avoiding large local CSV files and GitHub Pages size limits.

Notes:
- SODA responses are paged and rate-limited; the loader caps rows for performance. Increase limits as needed when running locally.
- For reproducibility, you can export filtered subsets to CSV, but the default flow uses live API data.
