# Temporal Analysis of Philadelphia Crime Incidents (2006-2026)

**Purpose:** Comprehensive temporal analysis including 20-year trends, seasonal decomposition, day/hour patterns, and crime-type-specific trends.

**Requirements Addressed:** TEMP-01 through TEMP-07

**Notebook Structure:**
1. Data Preparation and Time Series Construction
2. STL Decomposition and Seasonal Analysis
3. Day/Hour Patterns and Crime-Type Trends
4. Summary and Key Findings

## 1. Data Preparation and Time Series Construction

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Statistical libraries
from statsmodels.tsa.seasonal import STL
from scipy import stats
from scipy.stats import linregress

# Configuration
sys.path.append('../scripts')
from config import (
    PROCESSED_DATA_DIR, FIGURES_DIR, TABLES_DIR,
    COL_DATE, COL_UCR_GENERAL, COL_TEXT_GENERAL,
    PALETTE_SEQUENTIAL, FIG_SIZE_FULL, FIG_SIZE_HALF
)

# Ensure output directories exist
output_figures = Path('../output/figures/temporal')
output_tables = Path('../output/tables/temporal')
output_figures.mkdir(parents=True, exist_ok=True)
output_tables.mkdir(parents=True, exist_ok=True)

print("✓ Imports and configuration loaded")

In [None]:
# Configure matplotlib for publication quality
plt.rcParams.update({
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'font.size': 10,
    'axes.labelsize': 11,
    'axes.titlesize': 12,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'figure.figsize': (12, 8),
})

# Set seaborn style
sns.set_style("whitegrid")
sns.set_palette("viridis")

print("✓ Matplotlib configured for publication quality (300 DPI)")

In [None]:
# Load cleaned data
df = pd.read_parquet(PROCESSED_DATA_DIR / 'crime_incidents_cleaned.parquet')

print(f"✓ Loaded {len(df):,} records")
print(f"\nData columns: {list(df.columns)}")
print(f"\nDate range: {df[COL_DATE].min()} to {df[COL_DATE].max()}")

In [None]:
# Ensure datetime format and exclude last 30 days (reporting lag)
df[COL_DATE] = pd.to_datetime(df[COL_DATE])

# Exclude last 30 days to avoid under-reporting bias
cutoff_date = df[COL_DATE].max() - pd.Timedelta(days=30)
df_analysis = df[df[COL_DATE] <= cutoff_date].copy()

print(f"Original records: {len(df):,}")
print(f"After excluding last 30 days: {len(df_analysis):,}")
print(f"Analysis period: {df_analysis[COL_DATE].min().strftime('%Y-%m-%d')} to {df_analysis[COL_DATE].max().strftime('%Y-%m-%d')}")
print(f"Excluded {len(df) - len(df_analysis):,} recent records")

In [None]:
# Create multiple time series aggregations

# Set datetime as index for resampling
df_ts = df_analysis.set_index(COL_DATE)

# 1. Monthly counts (for STL decomposition)
monthly_counts = df_ts.resample('ME').size()
monthly_counts.name = 'crime_count'

# 2. Annual counts (for long-term trends)
annual_counts = df_ts.resample('YE').size()
annual_counts.name = 'crime_count'

# 3. Weekly counts (for medium-term patterns)
weekly_counts = df_ts.resample('W').size()
weekly_counts.name = 'crime_count'

print("Time series created:")
print(f"  Monthly: {len(monthly_counts)} observations ({monthly_counts.index.min().strftime('%Y-%m')} to {monthly_counts.index.max().strftime('%Y-%m')})")
print(f"  Annual: {len(annual_counts)} observations ({annual_counts.index.min().year} to {annual_counts.index.max().year})")
print(f"  Weekly: {len(weekly_counts)} observations")

In [None]:
# Handle missing dates - ensure complete time series

# For monthly series
full_month_range = pd.date_range(
    start=monthly_counts.index.min(),
    end=monthly_counts.index.max(),
    freq='ME'
)
monthly_counts = monthly_counts.reindex(full_month_range, fill_value=0)

# For weekly series
full_week_range = pd.date_range(
    start=weekly_counts.index.min(),
    end=weekly_counts.index.max(),
    freq='W'
)
weekly_counts = weekly_counts.reindex(full_week_range, fill_value=0)

# Check for gaps
monthly_gaps = (monthly_counts == 0).sum()
weekly_gaps = (weekly_counts == 0).sum()

print(f"After reindexing:")
print(f"  Monthly series: {len(monthly_counts)} observations ({monthly_gaps} gaps filled with 0)")
print(f"  Weekly series: {len(weekly_counts)} observations ({weekly_gaps} gaps filled with 0)")

if monthly_gaps > 0:
    print(f"\nWarning: {monthly_gaps} months with zero incidents detected")
    gap_months = monthly_counts[monthly_counts == 0].index
    print(f"Gap months: {list(gap_months.strftime('%Y-%m'))}")

In [None]:
# Create crime-type-specific time series

# Examine UCR codes in the data
print("UCR General Code Distribution:")
ucr_dist = df_analysis[COL_UCR_GENERAL].value_counts().sort_index()
print(ucr_dist)

print("\nText General Code Distribution (top 10):")
text_dist = df_analysis[COL_TEXT_GENERAL].value_counts().head(10)
print(text_dist)

In [None]:
# Define crime categories based on UCR codes
# UCR codes (General):
# 100-400: Violent crimes (Homicide, Rape, Robbery, Aggravated Assault)
# 500-700: Property crimes (Burglary, Theft, Motor Vehicle Theft)
# 800+: Other/Quality of life

def categorize_crime(ucr_code):
    """Categorize crime by UCR general code."""
    if pd.isna(ucr_code):
        return 'Unknown'
    ucr = int(ucr_code)
    if 100 <= ucr < 500:
        return 'Violent'
    elif 500 <= ucr < 800:
        return 'Property'
    else:
        return 'Other'

# Apply categorization
df_analysis['crime_category'] = df_analysis[COL_UCR_GENERAL].apply(categorize_crime)

# Create category time series
monthly_by_category = df_analysis.groupby([
    pd.Grouper(key=COL_DATE, freq='ME'),
    'crime_category'
]).size().unstack(fill_value=0)

# Ensure all categories are present
for cat in ['Violent', 'Property', 'Other', 'Unknown']:
    if cat not in monthly_by_category.columns:
        monthly_by_category[cat] = 0

print("Crime category distribution:")
print(df_analysis['crime_category'].value_counts())
print(f"\nMonthly by category shape: {monthly_by_category.shape}")
print(f"Categories: {list(monthly_by_category.columns)}")

In [None]:
# Save intermediate time series for potential reuse

# Save monthly counts
monthly_counts.to_frame().to_csv(output_tables / 'monthly_crime_counts.csv')

# Save annual counts
annual_counts.to_frame().to_csv(output_tables / 'annual_crime_counts.csv')

# Save weekly counts
weekly_counts.to_frame().to_csv(output_tables / 'weekly_crime_counts.csv')

# Save category breakdown
monthly_by_category.to_csv(output_tables / 'monthly_by_category.csv')

print("✓ Time series saved to output/tables/temporal/:")
print("  - monthly_crime_counts.csv")
print("  - annual_crime_counts.csv")
print("  - weekly_crime_counts.csv")
print("  - monthly_by_category.csv")

**Task 1 Complete:** Time series created with no missing months; monthly, annual, and weekly aggregations complete; crime-type splits validated.

---