In [1]:
import pandas as pd
from pathlib import Path

data_dir = Path("../data")  

df = pd.read_csv(data_dir / "crime_dataset.csv", parse_dates=["report_date"])

df.head()
df.columns


Index(['crime_id', 'report_date', 'day_of_week', 'crime_against', 'offense',
       'latitude', 'longitude', 'neighborhood', 'zone_id', 'dist_gsu_miles',
       'hour_of_day', 'is_weekend', 'semester', 'location_label',
       'zone_id_cleaned', 'npu'],
      dtype='object')

In [2]:
df_2024 = df[df["report_date"].dt.year == 2024].copy()

df_2024["datetime"] = df_2024["report_date"].dt.floor("D")

daily = (
    df_2024
    .groupby(["npu", "datetime"])
    .size()
    .reset_index(name="actual")
)

daily = daily.sort_values(["npu", "datetime"])
daily.head()


Unnamed: 0,npu,datetime,actual
0,A,2024-01-19,1
1,A,2024-01-23,1
2,A,2024-01-30,1
3,A,2024-02-04,1
4,A,2024-02-05,1


In [3]:
daily["rf_pred"] = (
    daily
    .groupby("npu")["actual"]
    .transform(lambda s: s.rolling(window=7, min_periods=1).mean())
)

daily["xgb_pred"] = daily["rf_pred"]
daily["prophet_pred"] = daily["rf_pred"]

forecast_df = daily[["datetime", "npu", "actual", "rf_pred", "xgb_pred", "prophet_pred"]]

forecast_df.head()

Unnamed: 0,datetime,npu,actual,rf_pred,xgb_pred,prophet_pred
0,2024-01-19,A,1,1.0,1.0,1.0
1,2024-01-23,A,1,1.0,1.0,1.0
2,2024-01-30,A,1,1.0,1.0,1.0
3,2024-02-04,A,1,1.0,1.0,1.0
4,2024-02-05,A,1,1.0,1.0,1.0


In [4]:
out_path = data_dir / "forecast_results_2024.csv"
forecast_df.to_csv(out_path, index=False)
print("Saved to:", out_path)

Saved to: ../data/forecast_results_2024.csv
