# Guardian Route - Data Exploration

Explore Denver crime data, spatial grid, and test Cynet API.

## Setup

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pickle
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('ggplot')
sns.set_palette('Set2')

print("✓ Libraries loaded")

## 1. Load Data

In [None]:
# Load crime data
crime_df = pd.read_csv('../data/processed/crime_filtered.csv')
crime_df['FIRST_OCCURRENCE_DATE'] = pd.to_datetime(crime_df['FIRST_OCCURRENCE_DATE'])

print(f"Crime records: {len(crime_df):,}")
print(f"Date range: {crime_df['FIRST_OCCURRENCE_DATE'].min().date()} to {crime_df['FIRST_OCCURRENCE_DATE'].max().date()}")
print(f"Columns: {list(crime_df.columns)}")

In [None]:
# Load spatial grid
grid_gdf = gpd.read_file('../data/processed/spatial_grid.geojson')

print(f"Spatial tiles: {len(grid_gdf):,}")
print(f"CRS: {grid_gdf.crs}")
grid_gdf.head()

## 2. Temporal Patterns

In [None]:
# Crime by year
crime_df['year'] = crime_df['FIRST_OCCURRENCE_DATE'].dt.year

fig, ax = plt.subplots(figsize=(12, 5))
crime_df['year'].value_counts().sort_index().plot(kind='bar', ax=ax)
plt.title('Crime Incidents by Year')
plt.xlabel('Year')
plt.ylabel('Number of Incidents')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Crime by hour of day
crime_df['hour'] = crime_df['FIRST_OCCURRENCE_DATE'].dt.hour

fig, ax = plt.subplots(figsize=(14, 5))
crime_df['hour'].value_counts().sort_index().plot(kind='bar', ax=ax, color='steelblue')
plt.title('Crime Incidents by Hour of Day')
plt.xlabel('Hour')
plt.ylabel('Number of Incidents')
plt.tight_layout()
plt.show()

print(f"Peak hours: {crime_df['hour'].value_counts().head(5).index.tolist()}")

In [None]:
# Crime by day of week
crime_df['day_of_week'] = crime_df['FIRST_OCCURRENCE_DATE'].dt.day_name()
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

fig, ax = plt.subplots(figsize=(10, 5))
day_counts = crime_df['day_of_week'].value_counts().reindex(day_order)
day_counts.plot(kind='bar', ax=ax, color='coral')
plt.title('Crime Incidents by Day of Week')
plt.xlabel('Day')
plt.ylabel('Number of Incidents')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Crime Types

In [None]:
# Top offense categories
if 'OFFENSE_CATEGORY_ID' in crime_df.columns:
    top_offenses = crime_df['OFFENSE_CATEGORY_ID'].value_counts().head(15)
    
    fig, ax = plt.subplots(figsize=(12, 6))
    top_offenses.plot(kind='barh', ax=ax)
    plt.title('Top 15 Offense Categories')
    plt.xlabel('Number of Incidents')
    plt.ylabel('Offense Category')
    plt.tight_layout()
    plt.show()
    
    print(f"\nTop 5 offense types:")
    for offense, count in top_offenses.head().items():
        pct = count / len(crime_df) * 100
        print(f"  {offense}: {count:,} ({pct:.1f}%)")

## 4. Spatial Distribution

In [None]:
# Crime density heatmap
crime_counts = crime_df.groupby('tile_id').size().reset_index(name='crime_count')
grid_with_counts = grid_gdf.merge(crime_counts, on='tile_id', how='left').fillna(0)

fig, ax = plt.subplots(figsize=(14, 12))
grid_with_counts.plot(
    column='crime_count',
    ax=ax,
    legend=True,
    cmap='YlOrRd',
    edgecolor='gray',
    linewidth=0.1,
    alpha=0.8
)
plt.title('Crime Density by Tile (2019-2024)', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.tight_layout()
plt.show()

print(f"\nSpatial statistics:")
print(f"  Tiles with crimes: {(grid_with_counts['crime_count'] > 0).sum():,}")
print(f"  Average crimes per tile: {grid_with_counts['crime_count'].mean():.1f}")
print(f"  Max crimes in single tile: {grid_with_counts['crime_count'].max():,}")

In [None]:
# Identify hotspots
hotspots = grid_with_counts.nlargest(10, 'crime_count')[['tile_id', 'crime_count', 'center_lon', 'center_lat']]
print("Top 10 crime hotspots:")
print(hotspots)

## 5. Training Data (Triplet Format)

In [None]:
# Load triplet
with open('../data/processed/crime_triplets.pkl', 'rb') as f:
    triplet = pickle.load(f)

print(f"Triplet structure:")
print(f"  Row coords (tiles): {len(triplet['row_coords']):,}")
print(f"  Column dates (hours): {len(triplet['col_dates']):,}")
print(f"  Timeseries shape: {triplet['timeseries'].shape}")
print(f"  Data type: {triplet['timeseries'].dtype}")
print(f"  Event rate: {triplet['timeseries'].mean()*100:.2f}%")

In [None]:
# Visualize timeseries sample
sample_tiles = 20
sample_hours = 168  # 1 week

fig, ax = plt.subplots(figsize=(16, 8))
im = ax.imshow(
    triplet['timeseries'][:sample_tiles, :sample_hours],
    cmap='RdYlGn_r',
    aspect='auto',
    interpolation='nearest'
)
plt.colorbar(im, ax=ax, label='Event Occurred (1) / No Event (0)')
plt.title(f'Spatiotemporal Event Matrix (First {sample_tiles} tiles, First {sample_hours} hours)')
plt.xlabel('Time (hours)')
plt.ylabel('Tile Index')
plt.tight_layout()
plt.show()

## 6. Test Cynet API (Optional)

In [None]:
# Test if Cynet is available
try:
    import cynet
    from cynet import xgModels
    print("✓ Cynet library available")
    print(f"  Version: {cynet.__version__ if hasattr(cynet, '__version__') else 'Unknown'}")
    
    # Create small test model
    print("\nTesting Cynet with dummy data...")
    test_tiles = np.array(['tile_1', 'tile_2', 'tile_3'])
    test_dates = pd.date_range('2023-01-01', periods=100, freq='H')
    test_data = np.random.randint(0, 2, size=(3, 100))
    
    # This is a placeholder - actual Cynet API may differ
    print("  Test data created (3 tiles × 100 hours)")
    print("  ✓ Cynet ready for model training")
    
except ImportError:
    print("✗ Cynet library not available")
    print("  To install: pip install cynet")
    print("  Or from source: git clone https://github.com/zeroknowledgediscovery/Cynet")
    print("  Note: Placeholder model will be used for routing")

## Summary

This notebook explored:
- ✅ Crime data temporal patterns (yearly, hourly, weekly)
- ✅ Offense category distribution  
- ✅ Spatial crime density across Denver
- ✅ Training data in Cynet triplet format
- ✅ Cynet library availability

**Next step:** Proceed to `02_interactive_routing.ipynb` for route generation!