#### Target Crimes Panel Patch

In [None]:
# Original Script was broken and provided nothing to model on:

def load_target_crimes_as_panel(path: Path) -> pd.DataFrame:
    """
    Convert target_crimes.parquet (row-level incidents) into NPU Ã— hour panel
    with burglary_count.
    """
    df = pd.read_parquet(path)
    df["report_date"] = pd.to_datetime(df["report_date"])
    df["hour_ts"] = df["report_date"].dt.floor("h")
    df["npu"] = df["npu"].astype(str).str.upper().str.strip()

    panel = (
        df.groupby(["npu", "hour_ts"])
        .size()
        .reset_index(name="burglary_count")
    )
    return panel

In [None]:
# Patched

from pathlib import Path
import pandas as pd

def load_target_crimes_as_panel(path: Path) -> pd.DataFrame:
    df = pd.read_parquet(path)

    # Ensure timestamps
    df["report_date"] = pd.to_datetime(df["report_date"])
    df["hour_ts"] = df["report_date"].dt.floor("h")
    df["npu"] = df["npu"].astype(str).str.upper().str.strip()

    # 1. Crime counts
    hourly_counts = (
        df.groupby(["npu", "hour_ts"])
          .size()
          .reset_index(name="burglary_count")
    )

    # 2. NUMERIC features (mean)
    numeric_cols = [
        'location_type_count', 'incident_hour', 'year', 'month',
        'hour_sin', 'hour_cos', 'temp_f', 'precip_in', 'rain_in',
        'apparent_temp_f', 'daylight_duration_sec', 'sunshine_duration_sec',
        'precip_hours', 'rain_sum_in', 'temp_mean_f', 
        'grid_density_7d', 'npu_crime_avg_30d', 'campus_distance_m'
    ]

    hourly_numeric = (
        df.groupby(["npu", "hour_ts"])[numeric_cols]
          .mean()
          .reset_index()
    )

    # 3. CATEGORICAL/BINARY (mode)
    cat_cols = [
        'day_number', 'day_of_week', 'hour_block', 
        'is_holiday', 'is_weekend', 'is_daylight',
        'weather_code_hourly', 'weather_code_daily',
        'offense_category', 'campus_label', 'campus_code',
        'event_watch_day_watch', 'event_watch_evening_watch', 'event_watch_morning_watch',
        'near_gsu','near_ga_tech','near_emory','near_clark','near_spelman',
        'near_morehouse','near_morehouse_med','near_atlanta_metro',
        'near_atlanta_tech','near_scad','near_john_marshall'
    ]

    hourly_cat = (
        df.groupby(["npu", "hour_ts"])[cat_cols]
          .agg(lambda x: x.mode().iloc[0] if not x.mode().empty else x.iloc[0])
          .reset_index()
    )

    # 4. Merge all
    panel = (
        hourly_counts
          .merge(hourly_numeric, on=["npu", "hour_ts"], how="left")
          .merge(hourly_cat, on=["npu", "hour_ts"], how="left")
          .sort_values(["npu", "hour_ts"])
          .reset_index(drop=True)
    )

    # 5. Lag features
    lags = [1, 3, 6, 12, 24, 168]
    for lag in lags:
        panel[f"lag_{lag}"] = panel.groupby("npu")["burglary_count"].shift(lag)

    panel = panel.dropna(subset=[f"lag_{l}" for l in lags])
    return panel


In [None]:
df_target = load_target_crimes_as_panel(DATA_TARGET)
df_target.columns

