# Create Lot Level Enforcement
Converted from `create_lot_level_enforcement.py`

In [None]:
"""
Create lot-level enforcement data aggregated by lot number instead of zone.
This allows each lot (CUE Garage, Library Garage, etc.) to have separate predictions.
"""
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
print("="*80)
print("CREATING LOT-LEVEL ENFORCEMENT DATA")
print("="*80)
# Load raw data
print("\n1. Loading raw data...")
tickets = pd.read_csv('../data/processed/tickets_enriched.csv', parse_dates=['Issue_DateTime'])
print(f"   Tickets: {len(tickets):,} records from {tickets['Issue_DateTime'].min()} to {tickets['Issue_DateTime'].max()}")
lpr = pd.read_csv('../data/processed/lpr_enriched.csv', parse_dates=['Date_Time'])
print(f"   LPR: {len(lpr):,} records from {lpr['Date_Time'].min()} to {lpr['Date_Time'].max()}")
# Note: AMP data is zone-level only, not lot-level
# We'll use LPR as proxy for total vehicles (unpaid estimate = LPR since we can't split AMP by lot)
print("   (Note: AMP data unavailable at lot level - using LPR as unpaid proxy)")
# Create hourly date range
print("\n2. Creating hourly grid...")
start_date = pd.to_datetime('2022-07-01 00:00:00')
end_date = pd.to_datetime('2025-06-30 23:00:00')
hourly_dates = pd.date_range(start=start_date, end=end_date, freq='H')
print(f"   Created {len(hourly_dates):,} hourly timestamps")
# Get all unique lots
tickets_lots = set(tickets['Lot_number'].dropna().astype(int).unique())
lpr_lots = set(lpr['Lot_number'].dropna().astype(int).unique())
all_lots = sorted(tickets_lots | lpr_lots)
print(f"   Total unique lots: {len(all_lots)}")
# Aggregate tickets by lot + hour
print("\n3. Aggregating tickets by lot...")
tickets['datetime'] = tickets['Issue_DateTime'].dt.floor('H')
tickets_hourly = tickets.groupby(['Lot_number', 'datetime']).size().reset_index(name='tickets_issued')
tickets_hourly['Lot_number'] = tickets_hourly['Lot_number'].astype(int)
print(f"   Tickets aggregated: {len(tickets_hourly):,} lot-hour combinations")
# Aggregate LPR by lot + hour
print("\n4. Aggregating LPR by lot...")
lpr['datetime'] = lpr['Date_Time'].dt.floor('H')
lpr_hourly = lpr.groupby(['Lot_number', 'datetime']).size().reset_index(name='lpr_scans')
lpr_hourly['Lot_number'] = lpr_hourly['Lot_number'].astype(int)
print(f"   LPR aggregated: {len(lpr_hourly):,} lot-hour combinations")
# Create full grid and merge
print("\n6. Merging all data...")
lot_hour_grid = pd.MultiIndex.from_product(
    [all_lots, hourly_dates],
    names=['Lot_number', 'datetime']
).to_frame(index=False)
enforcement_lot = lot_hour_grid.copy()
enforcement_lot = enforcement_lot.merge(
    lpr_hourly, on=['Lot_number', 'datetime'], how='left'
).merge(
    tickets_hourly, on=['Lot_number', 'datetime'], how='left'
)
# Fill NaN
enforcement_lot['lpr_scans'] = enforcement_lot['lpr_scans'].fillna(0).astype(int)
enforcement_lot['amp_sessions'] = 0  # Not available at lot level
enforcement_lot['tickets_issued'] = enforcement_lot['tickets_issued'].fillna(0).astype(int)
print(f"   Merged: {len(enforcement_lot):,} records")
# Calculate metrics
print("\n5. Calculating enforcement metrics...")
# Since AMP not available at lot level, use LPR as proxy for total vehicles
enforcement_lot['unpaid_estimate'] = enforcement_lot['lpr_scans']  # Assume all LPR vehicles are potential violators
enforcement_lot['enforcement_rate'] = 0.0
mask = enforcement_lot['lpr_scans'] > 0
enforcement_lot.loc[mask, 'enforcement_rate'] = (
    enforcement_lot.loc[mask, 'tickets_issued'] / enforcement_lot.loc[mask, 'lpr_scans']
)
enforcement_lot['lpr_estimated'] = False
# Add temporal features
print("\n6. Adding temporal and contextual features...")
enforcement_lot['date'] = enforcement_lot['datetime'].dt.date
enforcement_lot['hour'] = enforcement_lot['datetime'].dt.hour
enforcement_lot['year'] = enforcement_lot['datetime'].dt.year
enforcement_lot['month'] = enforcement_lot['datetime'].dt.month
enforcement_lot['day_of_week'] = enforcement_lot['datetime'].dt.dayofweek
enforcement_lot['is_weekend'] = (enforcement_lot['day_of_week'] >= 5).astype(int)
# Time of day
def categorize_time_of_day(hour):
    if 6 <= hour < 12: return 'Morning'
    elif 12 <= hour < 17: return 'Afternoon'
    elif 17 <= hour < 21: return 'Evening'
    elif 21 <= hour < 24: return 'Night'
    else: return 'Late Night'
enforcement_lot['time_of_day'] = enforcement_lot['hour'].apply(categorize_time_of_day)
time_map = {'Afternoon': 0, 'Evening': 1, 'Late Night': 2, 'Morning': 3, 'Night': 4}
enforcement_lot['time_of_day_code'] = enforcement_lot['time_of_day'].map(time_map)
# Merge weather
weather = pd.read_csv('../data/weather_pullman_hourly_2020_2025.csv', parse_dates=['datetime'])
enforcement_lot = enforcement_lot.merge(
    weather[['datetime', 'temperature_f', 'precipitation_inches', 'snowfall_inches',
             'snow_depth_inches', 'wind_mph', 'weather_code', 'weather_category',
             'is_rainy', 'is_snowy', 'is_cold', 'is_hot', 'is_windy', 'is_severe']],
    on='datetime', how='left'
)
# Calendar features
calendar = pd.read_csv('../data/academic_calendar.csv', parse_dates=['Start_Date', 'End_Date'])
games = pd.read_csv('../data/football_games.csv', parse_dates=['Date'])
enforcement_lot['is_game_day'] = 0
enforcement_lot['is_dead_week'] = 0
enforcement_lot['is_finals_week'] = 0
enforcement_lot['is_spring_break'] = 0
enforcement_lot['is_thanksgiving_break'] = 0
enforcement_lot['is_winter_break'] = 0
enforcement_lot['is_any_break'] = 0
for _, event in calendar.iterrows():
    mask = (enforcement_lot['date'] >= event['Start_Date'].date()) & (enforcement_lot['date'] <= event['End_Date'].date())
    if 'Dead Week' in event['Event_Type']:
        enforcement_lot.loc[mask, 'is_dead_week'] = 1
    elif 'Finals Week' in event['Event_Type']:
        enforcement_lot.loc[mask, 'is_finals_week'] = 1
    elif 'Spring Break' in event['Event_Type']:
        enforcement_lot.loc[mask, 'is_spring_break'] = 1
        enforcement_lot.loc[mask, 'is_any_break'] = 1
    elif 'Thanksgiving' in event['Event_Type']:
        enforcement_lot.loc[mask, 'is_thanksgiving_break'] = 1
        enforcement_lot.loc[mask, 'is_any_break'] = 1
    elif 'Winter Break' in event['Event_Type']:
        enforcement_lot.loc[mask, 'is_winter_break'] = 1
        enforcement_lot.loc[mask, 'is_any_break'] = 1
game_dates = games['Date'].dt.date.unique()
enforcement_lot.loc[enforcement_lot['date'].isin(game_dates), 'is_game_day'] = 1
# Save base data (2022-2025)
print("\n7. Saving base dataset (2022-2025)...")
enforcement_2022_2025 = enforcement_lot[
    (enforcement_lot['datetime'] >= '2022-07-01') &
    (enforcement_lot['datetime'] <= '2025-06-30')
].copy()
output_file = '../data/processed/enforcement_lot_level.csv'
enforcement_2022_2025.to_csv(output_file, index=False)
print(f"   Saved: {output_file} ({len(enforcement_2022_2025):,} records)")
# Extend to October 2025
print("\n8. Extending to October 2025...")
lpr_historical = enforcement_2022_2025[
    (enforcement_2022_2025['datetime'] >= '2022-07-01') &
    (enforcement_2022_2025['datetime'] <= '2024-12-31')
].copy()
lpr_patterns = lpr_historical.groupby(['Lot_number', 'day_of_week', 'hour', 'month']).agg({
    'lpr_scans': 'mean',
    'date': 'nunique'
}).reset_index()
lpr_patterns.columns = ['Lot_number', 'day_of_week', 'hour', 'month', 'avg_lpr_scans', 'num_dates']
lpr_patterns = lpr_patterns[lpr_patterns['num_dates'] >= 2]
# Create July-Oct 2025
july_oct_2025_dates = pd.date_range('2025-07-01', '2025-10-30 23:00:00', freq='H')
lot_hour_grid_2025 = pd.MultiIndex.from_product(
    [all_lots, july_oct_2025_dates],
    names=['Lot_number', 'datetime']
).to_frame(index=False)
# Merge with existing data for tickets
july_oct_existing = enforcement_lot[
    (enforcement_lot['datetime'] >= '2025-07-01') &
    (enforcement_lot['datetime'] <= '2025-10-30')
].copy()
july_oct_2025 = lot_hour_grid_2025.merge(
    july_oct_existing[['Lot_number', 'datetime', 'tickets_issued']],
    on=['Lot_number', 'datetime'], how='left'
)
# Add temporal features for merging with patterns
july_oct_2025['hour'] = july_oct_2025['datetime'].dt.hour
july_oct_2025['day_of_week'] = july_oct_2025['datetime'].dt.dayofweek
july_oct_2025['month'] = july_oct_2025['datetime'].dt.month
july_oct_2025['date'] = july_oct_2025['datetime'].dt.date
july_oct_2025['year'] = 2025
july_oct_2025 = july_oct_2025.merge(
    lpr_patterns[['Lot_number', 'day_of_week', 'hour', 'month', 'avg_lpr_scans']],
    on=['Lot_number', 'day_of_week', 'hour', 'month'], how='left'
)
# Fill missing values
july_oct_2025['tickets_issued'] = july_oct_2025['tickets_issued'].fillna(0).astype(int)
july_oct_2025['amp_sessions'] = 0  # Not available at lot level
july_oct_2025['lpr_scans'] = 0
july_oct_2025['lpr_estimated'] = False
# Use estimated LPR where available
has_pattern = july_oct_2025['avg_lpr_scans'].notna()
july_oct_2025.loc[has_pattern, 'lpr_scans'] = july_oct_2025.loc[has_pattern, 'avg_lpr_scans'].round().astype(int)
july_oct_2025.loc[has_pattern, 'lpr_estimated'] = True
# Recalculate metrics (using LPR as unpaid proxy)
july_oct_2025['unpaid_estimate'] = july_oct_2025['lpr_scans']
july_oct_2025['enforcement_rate'] = 0.0
mask = july_oct_2025['lpr_scans'] > 0
july_oct_2025.loc[mask, 'enforcement_rate'] = (
    july_oct_2025.loc[mask, 'tickets_issued'] / july_oct_2025.loc[mask, 'lpr_scans']
)
# Add all remaining features
july_oct_2025 = july_oct_2025.merge(
    enforcement_lot[enforcement_lot['datetime'] >= '2025-07-01'][
        ['datetime', 'is_weekend', 'time_of_day', 'time_of_day_code',
         'temperature_f', 'precipitation_inches', 'snowfall_inches', 'snow_depth_inches',
         'wind_mph', 'weather_code', 'weather_category', 'is_rainy', 'is_snowy',
         'is_cold', 'is_hot', 'is_windy', 'is_severe', 'is_game_day', 'is_dead_week',
         'is_finals_week', 'is_spring_break', 'is_thanksgiving_break',
         'is_winter_break', 'is_any_break']
    ].drop_duplicates('datetime'),
    on='datetime', how='left'
)
july_oct_2025 = july_oct_2025.drop('avg_lpr_scans', axis=1)
print(f"   July-Oct 2025: {len(july_oct_2025):,} records")
print(f"   Estimated LPR: {july_oct_2025['lpr_estimated'].sum():,} lot-hours")
# Combine and save
print("\n9. Combining and saving full extended dataset...")
enforcement_full_extended = pd.concat([enforcement_2022_2025, july_oct_2025], ignore_index=True)
enforcement_full_extended = enforcement_full_extended.sort_values(['Lot_number', 'datetime'])
output_file_extended = '../data/processed/enforcement_lot_level_extended.csv'
enforcement_full_extended.to_csv(output_file_extended, index=False)
print(f"\n{'='*80}")
print(f"COMPLETE!")
print(f"{'='*80}")
print(f"Total records: {len(enforcement_full_extended):,}")
print(f"Unique lots: {enforcement_full_extended['Lot_number'].nunique()}")
print(f"Date range: {enforcement_full_extended['datetime'].min()} to {enforcement_full_extended['datetime'].max()}")
print(f"\nSaved to: {output_file_extended}")
# Show sample for key lots
key_lots = [150, 71, 146]
lot_names = {150: 'CUE Garage', 71: 'Library Garage', 146: 'Student Rec Center'}
print(f"\n{'='*80}")
print("SAMPLE: Key Paid Lots")
print(f"{'='*80}")
for lot in key_lots:
    lot_data = enforcement_full_extended[enforcement_full_extended['Lot_number'] == lot]
    if len(lot_data) == 0:
        continue
    tickets = lot_data['tickets_issued'].sum()
    lpr = lot_data['lpr_scans'].sum()
    hours_enf = (lot_data['tickets_issued'] > 0).sum()
    print(f"\nLot {lot} - {lot_names[lot]}:")
    print(f"  Total tickets: {tickets:,}")
    print(f"  Total LPR scans: {lpr:,}")
    print(f"  Hours with enforcement: {hours_enf:,} ({hours_enf/len(lot_data)*100:.1f}%)")
print(f"\n{'='*80}")
