# Crime Incidents Time Series Forecasting
*Requirement: FORECAST-01*

**Objective:** Forecast daily crime incidents for the next 60 days using Prophet with 95% confidence intervals and anomaly detection.

**Key Outputs:**
- 60-day forecast with prediction intervals
- Anomaly detection thresholds
- Model performance metrics
- Operational recommendations

In [None]:
# Reproducibility parameters
VERSION = "v1.0"
FORECAST_HORIZON_DAYS = 60
VALIDATION_DAYS = 30
RANDOM_SEED = 42
CONFIDENCE_INTERVAL = 0.95

In [None]:
import time
from pathlib import Path
import sys
import warnings
warnings.filterwarnings('ignore')

# Robust repo_root detection
cwd = Path.cwd()
if (cwd / 'config' / 'phase1_config.yaml').exists():
    repo_root = cwd
elif (cwd.parent / 'config' / 'phase1_config.yaml').exists():
    repo_root = cwd.parent
else:
    raise RuntimeError(f"Cannot find config from cwd={cwd}")

sys.path.insert(0, str(repo_root))
print(f"Repository root: {repo_root}")

# Define paths
DATA_DIR = repo_root / 'data'
REPORTS_DIR = repo_root / 'reports'
REPORTS_DIR.mkdir(exist_ok=True)

# Record execution metadata
start_time = time.time()
run_timestamp = time.strftime('%Y%m%d_%H%M%S')
print(f"\nExecution started: {run_timestamp}")
print(f"Version: {VERSION}")
print(f"Forecast horizon: {FORECAST_HORIZON_DAYS} days")
print(f"Confidence interval: {CONFIDENCE_INTERVAL*100}%")
print(f"Random seed: {RANDOM_SEED}")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from datetime import datetime, timedelta

# Import custom utilities
from analysis.models.time_series import (
    prepare_prophet_data,
    create_train_test_split,
    get_prophet_config,
    evaluate_forecast,
    detect_anomalies
)

# Set random seed for reproducibility
np.random.seed(RANDOM_SEED)

# Configure plotting
sns.set_style('darkgrid')
sns.set_palette('husl')
%matplotlib inline

## 1. Data Loading and Preparation

Load crime incidents and aggregate by date to create daily time series.

In [None]:
# Load crime incidents
crime_file = DATA_DIR / 'crime_incidents_combined.parquet'
print(f"Loading data from: {crime_file}")
df_raw = pd.read_parquet(crime_file)

# Convert categorical dispatch_date to datetime for aggregation
df_raw['dispatch_date'] = pd.to_datetime(df_raw['dispatch_date'].astype(str))

print(f"\nRaw data shape: {df_raw.shape}")
print(f"Date range: {df_raw['dispatch_date'].min().date()} to {df_raw['dispatch_date'].max().date()}")
print(f"Total incidents: {len(df_raw):,}")

In [None]:
# Aggregate incidents by date
df_daily = df_raw.groupby('dispatch_date').size().reset_index(name='incident_count')
df_daily.columns = ['date', 'count']
df_daily['date'] = pd.to_datetime(df_daily['date'])
df_daily = df_daily.sort_values('date').reset_index(drop=True)

print(f"\nDaily aggregation complete")
print(f"Time series length: {len(df_daily)} days")
print(f"Date range: {df_daily['date'].min().date()} to {df_daily['date'].max().date()}")
print(f"\nDaily incident statistics:")
print(df_daily['count'].describe())

In [None]:
# Prepare data in Prophet format (ds, y)
df_prophet = prepare_prophet_data(df_daily, 'date', 'count')

print("\nProphet format data:")
print(df_prophet.head())
print(f"\nShape: {df_prophet.shape}")
print(f"Missing values: {df_prophet.isnull().sum().sum()}")

## 2. Exploratory Time Series Visualization

In [None]:
# Plot full time series
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(df_prophet['ds'], df_prophet['y'], linewidth=0.8, alpha=0.7)
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Daily Incident Count', fontsize=12)
ax.set_title('Daily Crime Incidents Over Time', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(REPORTS_DIR / 'forecast_timeseries_raw.png', dpi=300, bbox_inches='tight')
plt.show()

print("Time series visualization saved to reports/")

In [None]:
# Calculate rolling statistics
df_prophet['rolling_mean_7'] = df_prophet['y'].rolling(window=7, center=True).mean()
df_prophet['rolling_mean_30'] = df_prophet['y'].rolling(window=30, center=True).mean()

fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(df_prophet['ds'], df_prophet['y'], linewidth=0.5, alpha=0.4, label='Daily')
ax.plot(df_prophet['ds'], df_prophet['rolling_mean_7'], linewidth=1.5, label='7-day MA')
ax.plot(df_prophet['ds'], df_prophet['rolling_mean_30'], linewidth=2, label='30-day MA')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Incident Count', fontsize=12)
ax.set_title('Daily Incidents with Moving Averages', fontsize=14, fontweight='bold')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(REPORTS_DIR / 'forecast_timeseries_smoothed.png', dpi=300, bbox_inches='tight')
plt.show()

# Clean up rolling stats columns for Prophet
df_prophet = df_prophet[['ds', 'y']].copy()

## 3. Train/Validation Split

Create temporal split for model validation before generating future forecast.

In [None]:
# Create train/validation split
train_df, validation_df = create_train_test_split(df_prophet, test_days=VALIDATION_DAYS)

print(f"Train set: {len(train_df)} days ({train_df['ds'].min().date()} to {train_df['ds'].max().date()})")
print(f"Validation set: {len(validation_df)} days ({validation_df['ds'].min().date()} to {validation_df['ds'].max().date()})")
print(f"\nTrain set statistics:")
print(train_df['y'].describe())

In [None]:
# Capture execution metadata
metadata = {
    'version': VERSION,
    'execution_timestamp': run_timestamp,
    'forecast_horizon_days': FORECAST_HORIZON_DAYS,
    'validation_days': VALIDATION_DAYS,
    'confidence_interval': CONFIDENCE_INTERVAL,
    'random_seed': RANDOM_SEED,
    'data_source': str(crime_file),
    'total_observations': len(df_prophet),
    'train_observations': len(train_df),
    'validation_observations': len(validation_df),
    'date_range_start': str(df_prophet['ds'].min().date()),
    'date_range_end': str(df_prophet['ds'].max().date()),
    'mean_daily_incidents': float(df_prophet['y'].mean()),
    'std_daily_incidents': float(df_prophet['y'].std())
}

print("\nExecution metadata captured")