# Philadelphia Crime Incidents: Temporal Analysis

This notebook conducts comprehensive temporal analysis of Philadelphia crime data (2006-2026) including 20-year trends, seasonal decomposition, day/hour patterns, and crime-type-specific trends.

Purpose: Answer TEMP-01 through TEMP-07 requirements; establish temporal baseline for dashboard and report; validate against known Philadelphia patterns (summer peaks, weekday variation).

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import STL
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Import configuration
import sys
sys.path.append('scripts/')
from config import *

# Configure matplotlib for publication quality
plt.rcParams.update({
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'font.size': 10,
    'axes.labelsize': 11,
    'axes.titlesize': 12,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'figure.figsize': (12, 8),
})

In [2]:
# Load cleaned data from data/processed/crime_incidents_cleaned.parquet
df = pd.read_parquet('data/processed/crime_incidents_cleaned.parquet')

# Ensure datetime format
df['dispatch_date_time'] = pd.to_datetime(df['dispatch_date_time'])

# Exclude last 30 days (reporting lag)
cutoff_date = df['dispatch_date_time'].max() - pd.Timedelta(days=30)
df_analysis = df[df['dispatch_date_time'] <= cutoff_date].copy()

# Print data info
print(f"Original data: {len(df)} records")
print(f"Analysis data: {len(df_analysis)} records after excluding last 30 days")
print(f"Analysis date range: {df_analysis['dispatch_date_time'].min()} to {df_analysis['dispatch_date_time'].max()}")

In [3]:
# Define crime type mappings based on UCR codes
# According to FBI UCR classification:
# Violent crimes: 100 (Homicide), 200 (Criminal Homicide), 300 (Robbery), 400 (Aggravated Assault)
# Property crimes: 500 (Burglary), 600 (Larceny), 700 (Motor Vehicle Theft), 800 (Arson)
# Other crimes: Other UCR codes

UCR_VIOLENT = [100, 200, 300, 400]
UCR_PROPERTY = [500, 600, 700, 800]

# Create crime type columns
df_analysis['crime_type'] = df_analysis['ucr_general'].apply(lambda x: 
    'Violent' if x in UCR_VIOLENT 
    else 'Property' if x in UCR_PROPERTY 
    else 'Other'
)

print("Crime type distribution:")
print(df_analysis['crime_type'].value_counts())

In [4]:
# Create multiple time series aggregations
# 1. Daily counts
daily_counts = df_analysis.set_index('dispatch_date_time').resample('D').size()
daily_counts.name = 'crime_count'

# 2. Weekly counts
weekly_counts = df_analysis.set_index('dispatch_date_time').resample('W-MON').size()
weekly_counts.name = 'crime_count'

# 3. Monthly counts (for STL decomposition)
monthly_counts = df_analysis.set_index('dispatch_date_time').resample('ME').size()
monthly_counts.name = 'crime_count'

# 4. Annual counts
annual_counts = df_analysis.set_index('dispatch_date_time').resample('YE').size()
annual_counts.name = 'crime_count'

# Handle missing dates by reindexing with fill_value=0 for complete time series
full_daily_range = pd.date_range(start=daily_counts.index.min(), 
                                end=daily_counts.index.max(), 
                                freq='D')
daily_counts = daily_counts.reindex(full_daily_range, fill_value=0)

full_weekly_range = pd.date_range(start=weekly_counts.index.min(), 
                                 end=weekly_counts.index.max(), 
                                 freq='W-MON')
weekly_counts = weekly_counts.reindex(full_weekly_range, fill_value=0)

full_monthly_range = pd.date_range(start=monthly_counts.index.min(), 
                                  end=monthly_counts.index.max(), 
                                  freq='ME')
monthly_counts = monthly_counts.reindex(full_monthly_range, fill_value=0)

full_annual_range = pd.date_range(start=annual_counts.index.min(), 
                                 end=annual_counts.index.max(), 
                                 freq='YE')
annual_counts = annual_counts.reindex(full_annual_range, fill_value=0)

print(f"Daily time series: {len(daily_counts)} days")
print(f"Weekly time series: {len(weekly_counts)} weeks")
print(f"Monthly time series: {len(monthly_counts)} months")
print(f"Annual time series: {len(annual_counts)} years")

In [5]:
# Create crime-type-specific time series

# Filter data by crime type
df_violent = df_analysis[df_analysis['crime_type'] == 'Violent']
df_property = df_analysis[df_analysis['crime_type'] == 'Property']
df_other = df_analysis[df_analysis['crime_type'] == 'Other']

# Create monthly time series for each crime type
monthly_violent = df_violent.set_index('dispatch_date_time').resample('ME').size()
monthly_property = df_property.set_index('dispatch_date_time').resample('ME').size()
monthly_other = df_other.set_index('dispatch_date_time').resample('ME').size()

# Reindex to the full date range with fill_value=0
monthly_violent = monthly_violent.reindex(full_monthly_range, fill_value=0)
monthly_property = monthly_property.reindex(full_monthly_range, fill_value=0)
monthly_other = monthly_other.reindex(full_monthly_range, fill_value=0)

print(f"Monthly violent crimes: {len(monthly_violent)} months")
print(f"Monthly property crimes: {len(monthly_property)} months")
print(f"Monthly other crimes: {len(monthly_other)} months")

In [6]:
# Save intermediate time series to output/tables/temporal/ for potential reuse
import os
os.makedirs('output/tables/temporal/', exist_ok=True)

# Combine all time series into a DataFrame
timeseries_df = pd.DataFrame({
    'overall': monthly_counts,
    'violent': monthly_violent,
    'property': monthly_property,
    'other': monthly_other
})

# Save to CSV
timeseries_df.to_csv('output/tables/temporal/monthly_timeseries.csv')
print("Saved monthly time series to output/tables/temporal/monthly_timeseries.csv")

# Task 2: STL Decomposition and Seasonal Analysis

Implement STL decomposition and seasonal analysis per 02-RESEARCH.md Pattern 1:

In [7]:
# 1. STL Decomposition (overall crime):
# Use STL(monthly_counts, period=12, robust=True)
stl = STL(monthly_counts, period=12, robust=True)
result = stl.fit()

# Extract trend, seasonal, and residual components
trend = result.trend
seasonal = result.seasonal
residual = result.resid

# Create 4-panel decomposition plot (original, trend, seasonal, residual)
fig, axes = plt.subplots(4, 1, figsize=(14, 12), sharex=True)

# Original
axes[0].plot(monthly_counts.index, monthly_counts.values, color='gray', alpha=0.7)
axes[0].set_ylabel('Count')
axes[0].set_title('Original Time Series')
axes[0].grid(True, alpha=0.3)

# Trend
axes[1].plot(trend.index, trend.values, color='steelblue', linewidth=2)
axes[1].set_ylabel('Count')
axes[1].set_title('Trend Component')
axes[1].grid(True, alpha=0.3)

# Seasonal
axes[2].plot(seasonal.index, seasonal.values, color='green')
axes[2].set_ylabel('Count')
axes[2].set_title('Seasonal Component')
axes[2].grid(True, alpha=0.3)

# Residual
axes[3].plot(residual.index, residual.values, color='red', alpha=0.7)
axes[3].axhline(y=0, color='black', linestyle='--', linewidth=0.5)
axes[3].set_ylabel('Count')
axes[3].set_xlabel('Year')
axes[3].set_title('Residual Component')
axes[3].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/stl_decomposition_overall.png', dpi=300, bbox_inches='tight')
plt.show()

print("STL decomposition completed and 4-panel plot saved.")

In [8]:
# 2. Seasonal factor calculation:
# Group seasonal component by month
seasonal_by_month = result.seasonal.groupby(result.seasonal.index.month).mean()

# Calculate mean seasonal factor for each month
monthly_seasonal_factors = seasonal_by_month.sort_index()

# Compute seasonality magnitude: (summer peak - winter low) / overall mean * 100
summer_peak = monthly_seasonal_factors[[6, 7, 8]].mean()  # Jun-Aug
winter_low = monthly_seasonal_factors[[12, 1, 2]].mean()   # Dec-Feb
seasonality_magnitude = (summer_peak - winter_low) / monthly_counts.mean() * 100

# Create a DataFrame with seasonal factors
seasonal_df = pd.DataFrame({
    'month': range(1, 13),
    'factor': monthly_seasonal_factors.values
})

# Save seasonal_factors.csv to output/tables/temporal/
seasonal_df.to_csv('output/tables/temporal/seasonal_factors.csv', index=False)
print(f"Seasonal factors calculated and saved. Seasonality magnitude: {seasonality_magnitude:+.2f}%")
print(f"Summer peak avg: {summer_peak:+.2f}, Winter low avg: {winter_low:+.2f}")

In [9]:
# 3. Crime-type-specific decomposition:
# Repeat STL for violent, property, and other crime categories
stl_violent = STL(monthly_violent, period=12, robust=True)
result_violent = stl_violent.fit()

stl_property = STL(monthly_property, period=12, robust=True)
result_property = stl_property.fit()

stl_other = STL(monthly_other, period=12, robust=True)
result_other = stl_other.fit()

# Create comparative seasonal plot
fig, ax = plt.subplots(figsize=(12, 6))

# Calculate seasonal factors for each crime type
seasonal_violent_by_month = result_violent.seasonal.groupby(result_violent.seasonal.index.month).mean().sort_index()
seasonal_property_by_month = result_property.seasonal.groupby(result_property.seasonal.index.month).mean().sort_index()
seasonal_other_by_month = result_other.seasonal.groupby(result_other.seasonal.index.month).mean().sort_index()

ax.plot(seasonal_by_month.index, seasonal_by_month.values, label='Overall', marker='o')
ax.plot(seasonal_violent_by_month.index, seasonal_violent_by_month.values, label='Violent', marker='s')
ax.plot(seasonal_property_by_month.index, seasonal_property_by_month.values, label='Property', marker='^')
ax.plot(seasonal_other_by_month.index, seasonal_other_by_month.values, label='Other', marker='d')

ax.set_xlabel('Month')
ax.set_ylabel('Seasonal Factor')
ax.set_title('Seasonal Patterns by Crime Type')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/seasonal_factors_by_type.png', dpi=300, bbox_inches='tight')
plt.show()

print("Crime-type-specific decomposition completed and comparative plot saved.")

In [10]:
# 4. Trend quantification:
# Fit linear regression to trend component
x = np.arange(len(result.trend))
slope, intercept, r_value, p_value, std_err = stats.linregress(x, result.trend.values)

# Calculate annual change rate
annual_change = slope * 12  # Convert monthly to annual change

# Compute 95% confidence intervals for trend slope
from scipy.stats import t
alpha = 0.05
t_critical = t.ppf(1-alpha/2, len(x)-2)
slope_se = std_err
slope_ci_lower = slope - t_critical * slope_se
slope_ci_upper = slope + t_critical * slope_se

# Annual change CI
annual_change_ci_lower = slope_ci_lower * 12
annual_change_ci_upper = slope_ci_upper * 12

print(f"Overall trend: Annual change = {annual_change:+.2f} incidents/year")
print(f"95% CI for annual change: {annual_change_ci_lower:+.2f} to {annual_change_ci_upper:+.2f}")
print(f"P-value: {p_value:.2e}")
print(f"Significant at α=0.05: {'Yes' if p_value < 0.05 else 'No'}")

# Now do the same for each crime type
def calculate_trend_stats(trend_series, name):
    x = np.arange(len(trend_series))
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, trend_series.values)
    annual_change = slope * 12
    
    # Confidence intervals
    alpha = 0.05
    t_critical = t.ppf(1-alpha/2, len(x)-2)
    slope_se = std_err
    slope_ci_lower = slope - t_critical * slope_se
    slope_ci_upper = slope + t_critical * slope_se
    
    # Annual change CI
    annual_change_ci_lower = slope_ci_lower * 12
    annual_change_ci_upper = slope_ci_upper * 12
    
    return {
        'type': name,
        'annual_change': annual_change,
        'ci_lower': annual_change_ci_lower,
        'ci_upper': annual_change_ci_upper,
        'p_value': p_value,
        'significant': p_value < 0.05
    }

# Calculate trend statistics for each crime type
overall_stats = calculate_trend_stats(result.trend, 'Overall')
violent_stats = calculate_trend_stats(result_violent.trend, 'Violent')
property_stats = calculate_trend_stats(result_property.trend, 'Property')
other_stats = calculate_trend_stats(result_other.trend, 'Other')

# Combine into a DataFrame
trend_stats_df = pd.DataFrame([
    overall_stats, violent_stats, property_stats, other_stats
])

# Save trend_statistics.csv to output/tables/temporal/
trend_stats_df.to_csv('output/tables/temporal/trend_statistics.csv', index=False)
print("\nTrend statistics calculated and saved to output/tables/temporal/trend_statistics.csv")
print("\nTrend Statistics:")
print(trend_stats_df)

In [11]:
# 5. Save figures: trend_comparison_by_type.png
fig, ax = plt.subplots(figsize=(12, 6))

# Plot trends for each crime type
ax.plot(result.trend.index, result.trend.values, label='Overall', linewidth=2)
ax.plot(result_violent.trend.index, result_violent.trend.values, label='Violent', linewidth=2)
ax.plot(result_property.trend.index, result_property.trend.values, label='Property', linewidth=2)
ax.plot(result_other.trend.index, result_other.trend.values, label='Other', linewidth=2)

ax.set_xlabel('Year')
ax.set_ylabel('Trend Component')
ax.set_title('Trend Comparison by Crime Type')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/trend_comparison_by_type.png', dpi=300, bbox_inches='tight')
plt.show()

print("Trend comparison figure saved.")

# Task 3: Day/Hour Patterns and Crime-Type Trends

Analyze day-of-week and hour-of-day patterns plus detailed crime-type trends:

In [12]:
# 1. Day-of-week analysis:
# Extract day of week from dispatch date time
df_analysis['day_of_week'] = df_analysis['dispatch_date_time'].dt.dayofweek  # 0=Monday, 6=Sunday
df_analysis['day_name'] = df_analysis['dispatch_date_time'].dt.day_name()

# Aggregate incidents by day of week
day_of_week_counts = df_analysis.groupby('day_of_week')['cartodb_id'].count()
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Calculate weekend vs weekday difference
weekend_days = [5, 6]  # Saturday, Sunday
weekday_days = [0, 1, 2, 3, 4]  # Monday to Friday

weekend_avg = day_of_week_counts[weekend_days].mean()
weekday_avg = day_of_week_counts[weekday_days].mean()
weekend_vs_weekday = (weekend_avg - weekday_avg) / weekday_avg * 100

# Calculate 95% confidence intervals for each day
# We'll estimate using bootstrap sampling
bootstrap_samples = 1000
bootstrapped_means = []

for day in range(7):
    day_data = df_analysis[df_analysis['day_of_week'] == day]['cartodb_id']
    bootstraps = []
    for _ in range(bootstrap_samples):
        sample = day_data.sample(n=len(day_data), replace=True)
        bootstraps.append(sample.count())
    bootstraps = np.array(bootstraps)
    ci_lower = np.percentile(bootstraps, 2.5)
    ci_upper = np.percentile(bootstraps, 97.5)
    bootstrapped_means.append((day, ci_lower, ci_upper))

# Create day of week plot with error bars
fig, ax = plt.subplots(figsize=(10, 6))

days = range(7)
counts = [day_of_week_counts[day] if day in day_of_week_counts.index else 0 for day in days]
ci_lower = [bootstrapped_means[i][1] for i in range(7)]
ci_upper = [bootstrapped_means[i][2] for i in range(7)]

error_bars = [np.abs([ci_lower[i], ci_upper[i]] - counts[i]) for i in range(7)]

bars = ax.bar(range(7), counts, yerr=error_bars, capsize=5, 
              tick_label=day_names, color=sns.color_palette("viridis", 7))

ax.set_xlabel('Day of Week')
ax.set_ylabel('Number of Incidents')
ax.set_title(f'Day-of-Week Crime Patterns (Weekend vs Weekday: {weekend_vs_weekday:+.1f}%)')
plt.xticks(rotation=45)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('output/figures/temporal/day_of_week_patterns.png', dpi=300, bbox_inches='tight')
plt.show()

# Test significance of weekend effect with Mann-Whitney U test
weekend_data = df_analysis[df_analysis['day_of_week'].isin(weekend_days)]['cartodb_id']
weekday_data = df_analysis[df_analysis['day_of_week'].isin(weekday_days)]['cartodb_id']

from scipy.stats import mannwhitneyu
stat, p_val = mannwhitneyu(weekend_data, weekday_data, alternative='two-sided')

print(f"Weekend vs weekday difference: {weekend_vs_weekday:+.1f}%")
print(f"Weekend avg: {weekend_avg:.0f}, Weekday avg: {weekday_avg:.0f}")
print(f"Mann-Whitney U test p-value: {p_val:.2e}")
print(f"Weekend effect significant: {'Yes' if p_val < 0.05 else 'No'}")

In [13]:
# 2. Hour-of-day analysis:
# Aggregate incidents by hour (0-23)
df_analysis['hour'] = df_analysis['dispatch_date_time'].dt.hour
hourly_counts = df_analysis.groupby('hour')['cartodb_id'].count()

# Identify peak hours by crime type
hourly_violent = df_analysis[df_analysis['crime_type'] == 'Violent'].groupby('hour')['cartodb_id'].count()
hourly_property = df_analysis[df_analysis['crime_type'] == 'Property'].groupby('hour')['cartodb_id'].count()
hourly_other = df_analysis[df_analysis['crime_type'] == 'Other'].groupby('hour')['cartodb_id'].count()

# Create line plot with multiple series (overall, violent, property)
fig, ax = plt.subplots(figsize=(12, 6))

hours = range(24)
overall_hourly = [hourly_counts[hour] if hour in hourly_counts else 0 for hour in hours]
violent_hourly = [hourly_violent[hour] if hour in hourly_violent else 0 for hour in hours]
property_hourly = [hourly_property[hour] if hour in hourly_property else 0 for hour in hours]

ax.plot(hours, overall_hourly, label='Overall', linewidth=2, marker='o')
ax.plot(hours, violent_hourly, label='Violent', linewidth=2, marker='s')
ax.plot(hours, property_hourly, label='Property', linewidth=2, marker='^')

ax.set_xlabel('Hour of Day')
ax.set_ylabel('Number of Incidents')
ax.set_title('Hour-of-Day Crime Patterns by Crime Type')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/hour_of_day_patterns.png', dpi=300, bbox_inches='tight')
plt.show()

# Find peak hours
overall_peak_hour = np.argmax(overall_hourly)
violent_peak_hour = np.argmax(violent_hourly)
property_peak_hour = np.argmax(property_hourly)

print(f"Peak hours: Overall={overall_peak_hour}, Violent={violent_peak_hour}, Property={property_peak_hour}")

In [14]:
# 3. Day × Hour heatmap:
# Create 7×24 heatmap (day of week × hour)
hourly_daily = df_analysis.groupby(['day_of_week', 'hour']).size().unstack(fill_value=0)

# Ensure all hours (0-23) and days (0-6) are represented
for h in range(24):
    if h not in hourly_daily.columns:
        hourly_daily[h] = 0
for d in range(7):
    if d not in hourly_daily.index:
        hourly_daily.loc[d] = [0] * 24

hourly_daily = hourly_daily.reindex(columns=range(24)).reindex(index=range(7))

# Sort rows to have Monday first
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create heatmap
fig, ax = plt.subplots(figsize=(16, 6))
sns.heatmap(hourly_daily, cmap='YlOrRd', annot=False, fmt='g', 
            cbar_kws={'label': 'Number of Incidents'}, ax=ax)
ax.set_xlabel('Hour of Day', fontweight='bold')
ax.set_ylabel('Day of Week', fontweight='bold')
ax.set_yticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
ax.set_title('Crime Incidents by Hour and Day of Week', fontweight='bold', pad=15)

plt.tight_layout()
plt.savefig('output/figures/temporal/hour_day_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()

print("Day × Hour heatmap created and saved.")

In [15]:
# 4. Crime-type-specific trends:
# 20-year trend for each major UCR category
df_analysis['year'] = df_analysis['dispatch_date_time'].dt.year

# Yearly counts by crime type
yearly_by_type = df_analysis.groupby(['year', 'crime_type']).size().unstack(fill_value=0)

# Create multi-line trend plot
fig, ax = plt.subplots(figsize=(12, 6))

for col in yearly_by_type.columns:
    ax.plot(yearly_by_type.index, yearly_by_type[col], label=col, linewidth=2, marker='o')

ax.set_xlabel('Year')
ax.set_ylabel('Number of Incidents')
ax.set_title('20-Year Crime Trends by Type')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/crime_type_trends_20yr.png', dpi=300, bbox_inches='tight')
plt.show()

# Calculate rate of change for each type
for col in yearly_by_type.columns:
    x = np.arange(len(yearly_by_type))
    y = yearly_by_type[col].values
    slope, _, _, p_val, stderr = stats.linregress(x, y)
    annual_change = slope
    print(f"{col} trend: {annual_change:+.1f} incidents/year (p={p_val:.2e}, {'sig' if p_val < 0.05 else 'ns'})")

In [16]:
# 5. Recent trend analysis (last 5 years):
# Focus on 2020-2025 to identify recent patterns
recent_years = df_analysis[df_analysis['year'] >= 2020]
recent_yearly_by_type = recent_years.groupby(['year', 'crime_type']).size().unstack(fill_value=0)

# Create recent trend plot
fig, ax = plt.subplots(figsize=(12, 6))

for col in recent_yearly_by_type.columns:
    ax.plot(recent_yearly_by_type.index, recent_yearly_by_type[col], label=col, linewidth=2, marker='o')

ax.set_xlabel('Year')
ax.set_ylabel('Number of Incidents')
ax.set_title('Recent Crime Trends (2020-2025) by Type')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/figures/temporal/recent_trends_5yr.png', dpi=300, bbox_inches='tight')
plt.show()

# Calculate recent trends
for col in recent_yearly_by_type.columns:
    if len(recent_yearly_by_type) > 1:  # Ensure we have enough data points
        x = np.arange(len(recent_yearly_by_type))
        y = recent_yearly_by_type[col].values
        slope, _, _, p_val, stderr = stats.linregress(x, y)
        annual_change = slope
        print(f"Recent {col} trend (2020-2025): {annual_change:+.1f} incidents/year (p={p_val:.2e}, {'sig' if p_val < 0.05 else 'ns'})")

# Document any structural breaks around COVID period
# Compare 2019 to 2020 values
pre_covid = df_analysis[df_analysis['year'] == 2019].groupby('crime_type').size()
post_covid = df_analysis[df_analysis['year'] == 2020].groupby('crime_type').size()

print('\nCOVID-19 Period Changes:')
for ct in pre_covid.index:
    if ct in post_covid.index:
        change = ((post_covid[ct] - pre_covid[ct]) / pre_covid[ct]) * 100
        print(f'{ct}: {change:+.1f}% change from 2019 to 2020')

In [17]:
# 6. Save outputs: temporal_summary_stats.csv
# Create a comprehensive summary of all calculated statistics
summary_stats = []

# Add trend statistics
for idx, row in trend_stats_df.iterrows():
    summary_stats.append({
        'metric': f"{row['type']} Annual Change",
        'value': row['annual_change'],
        'ci_lower': row['ci_lower'],
        'ci_upper': row['ci_upper'],
        'p_value': row['p_value'],
        'category': 'Trend Statistics'
    })

# Add seasonal statistics
summary_stats.append({
    'metric': 'Seasonality Magnitude (% difference Summer vs Winter)',
    'value': seasonality_magnitude,
    'ci_lower': None,
    'ci_upper': None,
    'p_value': None,
    'category': 'Seasonal Statistics'
})

# Add day of week statistics
summary_stats.append({
    'metric': 'Weekend vs Weekday Difference (%)',
    'value': weekend_vs_weekday,
    'ci_lower': None,
    'ci_upper': None,
    'p_value': p_val,
    'category': 'Day/Week Statistics'
})

# Add peak hour information
summary_stats.extend([
    {
        'metric': 'Peak Hour - Overall',
        'value': overall_peak_hour,
        'ci_lower': None,
        'ci_upper': None,
        'p_value': None,
        'category': 'Hour Statistics'
    },
    {
        'metric': 'Peak Hour - Violent',
        'value': violent_peak_hour,
        'ci_lower': None,
        'ci_upper': None,
        'p_value': None,
        'category': 'Hour Statistics'
    },
    {
        'metric': 'Peak Hour - Property',
        'value': property_peak_hour,
        'ci_lower': None,
        'ci_upper': None,
        'p_value': None,
        'category': 'Hour Statistics'
    }
])

summary_df = pd.DataFrame(summary_stats)
summary_df.to_csv('output/tables/temporal/temporal_summary_stats.csv', index=False)
print("Summary statistics saved to output/tables/temporal/temporal_summary_stats.csv")
print(summary_df[['metric', 'value', 'category']].head(10))

In [18]:
# 7. Notebook conclusion:
# Executive summary of temporal findings
print("\n=== EXECUTIVE SUMMARY OF TEMPORAL FINDINGS ===\n")

print("1. 20-YEAR TREND ANALYSIS:")
for idx, row in trend_stats_df.iterrows():
    sig = "SIGNIFICANT" if row['significant'] else "NS"
    print(f"   - {row['type']}: {row['annual_change']:+.1f} incidents/year ({sig}, p={row['p_value']:.2e})")

print(f"\n2. SEASONALITY PATTERNS:")
print(f"   - Seasonality magnitude: {seasonality_magnitude:+.1f}% (Summer vs Winter difference)")
print(f"   - Summer peak months (Jun-Aug): Average {summer_peak:+.1f} incidents relative to trend")
print(f"   - Winter low months (Dec-Feb): Average {winter_low:+.1f} incidents relative to trend")

print(f"\n3. DAY/HOUR PATTERNS:")
print(f"   - Weekend vs weekday difference: {weekend_vs_weekday:+.1f}%")
print(f"   - Peak overall hour: {overall_peak_hour}:00")
print(f"   - Peak violent crime hour: {violent_peak_hour}:00")
print(f"   - Peak property crime hour: {property_peak_hour}:00")

print(f"\n4. VALIDATION AGAINST KNOWN PATTERNS:")
# Check if patterns align with known Philadelphia crime patterns
summer_peaks_known = seasonality_magnitude > 0  # Summer should have more crime
weekend_effect_exists = abs(weekend_vs_weekday) > 5  # Meaningful weekend vs weekday difference

print(f"   - Summer peaks confirmed: {'Yes' if summer_peaks_known else 'No'}")
print(f"   - Weekend vs weekday patterns: {'Yes' if weekend_effect_exists else 'No'}")

print(f"\n5. RECOMMENDATIONS FOR DASHBOARD VISUALIZATIONS:")
print(f"   - Include 4-panel STL decomposition plot")
print(f"   - Display seasonal patterns by crime type")
print(f"   - Show 24-hour crime patterns with differentiation by crime type")
print(f"   - Create day-of-week heat map")
print(f"   - Include trend comparison by crime type")

print(f"\n=== TEMPORAL ANALYSIS COMPLETE ===")