# Robbery Temporal Patterns (PATROL-02)

**Objective:** Identify peak robbery times by Hour x Weekday for patrol shift optimization.

**Output:** Temporal heatmap visualization and actionable patrol timing recommendations.

In [None]:
# Parameters (injected by papermill)
VERSION = "v1.0"
FAST_MODE = False

In [None]:
import time
from pathlib import Path
import sys

# Robust repo_root detection: works from notebooks/ dir or project root
cwd = Path.cwd()
if (cwd / 'config' / 'phase2_config.yaml').exists():
    repo_root = cwd  # Running from project root (papermill)
elif (cwd.parent / 'config' / 'phase2_config.yaml').exists():
    repo_root = cwd.parent  # Running from notebooks/ dir
else:
    raise RuntimeError(f"Cannot find config from cwd={cwd}")
print(f"DEBUG repo_root: {repo_root}")
sys.path.insert(0, str(repo_root))

from analysis.phase2_config_loader import load_phase2_config

config_path = repo_root / "config" / "phase2_config.yaml"
config = load_phase2_config(config_path)

REPORTS_DIR = (repo_root / "reports").resolve()
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

print("Configuration loaded")
print(f"Version: {config.version}")
print(f"Robbery UCR range: {config.heatmap.robbery_ucr_range}")
print(f"Reports dir: {REPORTS_DIR}")

artifacts = []
RUNTIME_START = time.time()

In [None]:
from datetime import datetime
import platform

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("Reproducibility Info")
print(f"Timestamp (local): {datetime.now().isoformat()}")
print(f"Python: {sys.version.split()[0]}")
print(f"Platform: {platform.platform()}")
print(f"Pandas: {pd.__version__}")
print(f"NumPy: {np.__version__}")
print(f"Matplotlib: {plt.matplotlib.__version__}")
print(f"Seaborn: {sns.__version__}")

## Overview

This notebook analyzes robbery incidents in Philadelphia to identify temporal patterns that can inform patrol shift allocation. We create a heatmap showing robbery frequency by:

- **Time of Day:** 4-hour bins (00-04, 04-08, 08-12, 12-16, 16-20, 20-24)
- **Day of Week:** Monday through Sunday

The resulting 42-cell matrix reveals when robberies are most likely to occur, enabling data-driven patrol scheduling.

## 1. Data Loading and Filtering

In [None]:
from analysis.utils import load_data

# Load full crime dataset
df = load_data(clean=True)
print(f"Total crime incidents loaded: {len(df):,}")

In [None]:
# Filter to robbery incidents (UCR 300-399)
robbery_range = config.heatmap.robbery_ucr_range
df['ucr_general'] = pd.to_numeric(df['ucr_general'], errors='coerce')
df_robbery = df[(df['ucr_general'] >= robbery_range[0]) & 
                (df['ucr_general'] < robbery_range[1])].copy()

print(f"Total robbery incidents (UCR {robbery_range[0]}-{robbery_range[1]-1}): {len(df_robbery):,}")
print(f"Robbery percentage of all crime: {len(df_robbery)/len(df)*100:.1f}%")

## 2. Temporal Feature Extraction

In [None]:
# Parse datetime and extract temporal features
df_robbery['dispatch_datetime'] = pd.to_datetime(df_robbery['dispatch_date_time'], errors='coerce')
df_robbery = df_robbery.dropna(subset=['dispatch_datetime'])

df_robbery['hour'] = df_robbery['dispatch_datetime'].dt.hour
df_robbery['day_of_week'] = df_robbery['dispatch_datetime'].dt.dayofweek
df_robbery['day_name'] = df_robbery['dispatch_datetime'].dt.day_name()

# Create 4-hour time bins
time_bins = [0, 4, 8, 12, 16, 20, 24]
time_labels = ['00-04', '04-08', '08-12', '12-16', '16-20', '20-24']
df_robbery['time_bin'] = pd.cut(df_robbery['hour'], bins=time_bins, labels=time_labels, right=False)

print(f"Records with valid temporal data: {len(df_robbery):,}")
print(f"Hour coverage: {df_robbery['hour'].notna().sum() / len(df_robbery) * 100:.1f}%")
print(f"\nYear range: {df_robbery['dispatch_datetime'].dt.year.min()} - {df_robbery['dispatch_datetime'].dt.year.max()}")

## 3. City-Wide Temporal Heatmap

In [None]:
# Create pivot table: time bin x day of week
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
heatmap_data = df_robbery.pivot_table(
    values='objectid', 
    index='time_bin', 
    columns='day_name', 
    aggfunc='count'
)[day_order]

print("City-wide robbery counts by time bin x day:")
print(heatmap_data)
print(f"\nTotal cells: {heatmap_data.size} (6 time bins x 7 days)")

In [None]:
# Create heatmap visualization
fig, ax = plt.subplots(figsize=(12, 8))

# Yellow-Orange-Red colormap per context decisions
cmap = sns.color_palette("YlOrRd", as_cmap=True)

sns.heatmap(heatmap_data, annot=True, fmt='.0f', cmap=cmap, 
            linewidths=0.5, ax=ax, cbar_kws={'label': 'Incident Count'})

ax.set_title('Philadelphia Robbery Incidents: Hour x Weekday\n(All Years Combined)', fontsize=14)
ax.set_xlabel('Day of Week', fontsize=12)
ax.set_ylabel('Time of Day (4-hour bins)', fontsize=12)

plt.tight_layout()
plt.savefig(REPORTS_DIR / 'robbery_temporal_heatmap.png', dpi=300, bbox_inches='tight')
artifacts.append(('robbery_temporal_heatmap.png', 'City-wide robbery temporal heatmap'))
print(f"Saved: {REPORTS_DIR / 'robbery_temporal_heatmap.png'}")
plt.show()

## 4. Per-District Breakdown

In [None]:
# Check if meaningful district differences exist
district_time = df_robbery.groupby(['dc_dist', 'time_bin', 'day_name']).size().reset_index(name='count')

# Calculate coefficient of variation across districts for each time-day cell
district_stats = district_time.groupby(['time_bin', 'day_name'])['count'].agg(['mean', 'std'])
district_cv = (district_stats['std'] / district_stats['mean']).mean()

print(f"Mean coefficient of variation across districts: {district_cv:.2f}")

if district_cv > 0.5:  # High variance = meaningful district differences
    print("Significant district-level variation detected. Creating per-district breakdown.")
    create_district_heatmaps = True
else:
    print("District patterns largely consistent with city-wide. Skipping per-district breakdown.")
    create_district_heatmaps = False

In [None]:
if create_district_heatmaps:
    # Get top 6 districts by robbery count
    top_districts = df_robbery['dc_dist'].value_counts().head(6).index.tolist()
    print(f"Top 6 districts by robbery count: {top_districts}")
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    axes = axes.flatten()
    
    for idx, district in enumerate(top_districts):
        df_dist = df_robbery[df_robbery['dc_dist'] == district]
        pivot = df_dist.pivot_table(
            values='objectid', index='time_bin', columns='day_name', aggfunc='count'
        )
        # Reindex to ensure all days are present
        pivot = pivot.reindex(columns=day_order)
        
        sns.heatmap(pivot, annot=True, fmt='.0f', cmap=cmap, 
                   linewidths=0.5, ax=axes[idx], cbar=False)
        axes[idx].set_title(f'District {district}', fontsize=11)
        axes[idx].set_xlabel('')
        axes[idx].set_ylabel('')
    
    plt.suptitle('Robbery Temporal Patterns by District', fontsize=14, y=1.02)
    plt.tight_layout()
    plt.savefig(REPORTS_DIR / 'robbery_temporal_by_district.png', dpi=300, bbox_inches='tight')
    artifacts.append(('robbery_temporal_by_district.png', 'Per-district temporal heatmaps'))
    print(f"Saved: {REPORTS_DIR / 'robbery_temporal_by_district.png'}")
    plt.show()
else:
    print("Skipping per-district heatmaps due to low variance.")

## 5. Peak Time Analysis

In [None]:
# Find top 5 peak time-day combinations
flat_data = heatmap_data.stack().reset_index()
flat_data.columns = ['time_bin', 'day_name', 'count']
peaks = flat_data.nlargest(5, 'count')

print("Top 5 Peak Robbery Periods:")
print("="*40)
for _, row in peaks.iterrows():
    print(f"  {row['day_name']:10} {row['time_bin']}: {int(row['count']):,} incidents")

# Find lowest periods for comparison
lows = flat_data.nsmallest(5, 'count')
print("\nLowest 5 Robbery Periods:")
print("="*40)
for _, row in lows.iterrows():
    print(f"  {row['day_name']:10} {row['time_bin']}: {int(row['count']):,} incidents")

In [None]:
# Calculate percentage of robberies by time bin
time_pct = df_robbery['time_bin'].value_counts(normalize=True).sort_index() * 100
print("Robbery Distribution by Time of Day:")
print("="*40)
for time_bin, pct in time_pct.items():
    print(f"  {time_bin}: {pct:.1f}%")

# Calculate percentage by day
day_pct = df_robbery['day_name'].value_counts(normalize=True).reindex(day_order) * 100
print("\nRobbery Distribution by Day of Week:")
print("="*40)
for day, pct in day_pct.items():
    print(f"  {day}: {pct:.1f}%")

## 6. Patrol Recommendations

In [None]:
# Determine shift recommendations based on peak analysis
peak_time = str(peaks.iloc[0]['time_bin'])
peak_day = str(peaks.iloc[0]['day_name'])
peak_count = int(peaks.iloc[0]['count'])
low_count = int(lows.iloc[0]['count'])

# Evening/night emphasis if 16-20 or 20-24 are peaks
evening_pct = float(time_pct.get('16-20', 0)) + float(time_pct.get('20-24', 0))
weekend_pct = float(day_pct.get('Friday', 0)) + float(day_pct.get('Saturday', 0)) + float(day_pct.get('Sunday', 0))

recommendations = []
if evening_pct > 40:
    recommendations.append(f"Increase evening/night patrol staffing (16:00-24:00) - {evening_pct:.1f}% of robberies occur in this window")
if weekend_pct > 45:
    recommendations.append(f"Increase weekend patrol allocation - {weekend_pct:.1f}% of robberies occur Fri-Sun")
if peak_count > low_count * 2:
    recommendations.append(f"Prioritize {peak_day} {peak_time} for robbery prevention patrols - {peak_count:,} incidents vs {low_count:,} in lowest period")

# Add general recommendations based on pattern analysis
highest_time = time_pct.idxmax()
highest_day = day_pct.idxmax()
recommendations.append(f"Peak time bin: {highest_time} ({float(time_pct[highest_time]):.1f}% of all robberies)")
recommendations.append(f"Peak day: {highest_day} ({float(day_pct[highest_day]):.1f}% of all robberies)")

print("\n" + "="*60)
print("PATROL TIMING RECOMMENDATIONS")
print("="*60)
for rec in recommendations:
    print(f"- {rec}")

In [None]:
# Create recommendations summary file
rec_text = "\n".join([f"- {r}" for r in recommendations])

summary_content = f"""# Robbery Temporal Analysis Recommendations

**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}
**Requirement:** PATROL-02

## Key Findings

- Total robbery incidents analyzed: {len(df_robbery):,}
- Peak period: {peak_day} {peak_time}
- Evening/night robberies (16:00-24:00): {evening_pct:.1f}% of total
- Weekend robberies (Fri-Sun): {weekend_pct:.1f}% of total

## Actionable Recommendations

{rec_text}

## Peak Periods (Top 5)

| Day | Time | Incidents |
|-----|------|----------:|
"""

for _, row in peaks.iterrows():
    summary_content += f"| {row['day_name']} | {row['time_bin']} | {int(row['count']):,} |\n"

summary_content += f"""
## Lowest Periods (Top 5)

| Day | Time | Incidents |
|-----|------|----------:|
"""

for _, row in lows.iterrows():
    summary_content += f"| {row['day_name']} | {row['time_bin']} | {int(row['count']):,} |\n"

summary_content += f"""
## Time Distribution

| Time Bin | Percentage |
|----------|----------:|
"""

for time_bin, pct in time_pct.items():
    summary_content += f"| {time_bin} | {pct:.1f}% |\n"

summary_content += f"""
## Day Distribution

| Day | Percentage |
|-----|----------:|
"""

for day, pct in day_pct.items():
    summary_content += f"| {day} | {pct:.1f}% |\n"

with open(REPORTS_DIR / 'robbery_patrol_recommendations.md', 'w') as f:
    f.write(summary_content)
    
artifacts.append(('robbery_patrol_recommendations.md', 'Patrol timing recommendations'))
print(f"Saved: {REPORTS_DIR / 'robbery_patrol_recommendations.md'}")

## Conclusion

This analysis reveals clear temporal patterns in robbery incidents across Philadelphia. The heatmap visualization identifies specific time-day combinations that experience the highest robbery rates, enabling data-driven patrol scheduling decisions.

**Key Insights:**
- Robbery incidents show distinct temporal clustering
- Evening and night hours (16:00-24:00) typically see elevated robbery activity
- The difference between peak and low periods provides clear guidance for resource allocation

**Recommendations:**
Patrol commanders should consider shifting resources toward the identified peak periods while maintaining baseline coverage during lower-activity times.

In [None]:
print("\n" + "="*60)
print("NOTEBOOK COMPLETE: Robbery Temporal Heatmap (PATROL-02)")
print("="*60)
print(f"\nArtifacts generated:")
for name, desc in artifacts:
    print(f"  - {name}: {desc}")
print(f"\nRuntime: {time.time() - RUNTIME_START:.1f} seconds")