In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("Libraries loaded")
print(f"Pandas: {pd.__version__}")
print(f"NumPy: {np.__version__}")


In [None]:
meters_df = pd.read_csv('Parking_Meters_20250703.csv')
print(f"Got {len(meters_df)} parking meters")
print(f"Columns: {len(meters_df.columns)} total")

meters_df.head()


In [None]:
print("Missing values:")
print(meters_df.isnull().sum())

print("\nTop neighborhoods:")
print(meters_df['analysis_neighborhood'].value_counts().head(10))


In [None]:
def generate_synthetic_transactions(meters_df, days=90):
    end_date = datetime(2025, 2, 15)  # Project completion date
    start_date = end_date - timedelta(days=days)
    date_range = pd.date_range(start=start_date, end=end_date, freq='H')
    
    transactions = []
    
    # Based on what I know about SF neighborhoods from living here
    neighborhood_patterns = {
        'Mission': {'base_occupancy': 0.75, 'peak_hours': [9, 10, 11, 17, 18, 19], 'weekend_boost': 1.2},
        'Nob Hill': {'base_occupancy': 0.85, 'peak_hours': [8, 9, 10, 16, 17, 18], 'weekend_boost': 1.1},
        'Tenderloin': {'base_occupancy': 0.60, 'peak_hours': [10, 11, 12, 13, 14], 'weekend_boost': 0.9},
        'Hayes Valley': {'base_occupancy': 0.70, 'peak_hours': [9, 10, 11, 17, 18, 19], 'weekend_boost': 1.3},
        'Western Addition': {'base_occupancy': 0.65, 'peak_hours': [8, 9, 10, 16, 17, 18], 'weekend_boost': 1.1}
    }
    
    for neighborhood in neighborhood_patterns.keys():
        neighborhood_meters = meters_df[meters_df['analysis_neighborhood'] == neighborhood]
        if len(neighborhood_meters) == 0:
            continue
            
        sample_meters = neighborhood_meters.sample(min(50, len(neighborhood_meters)))
        pattern = neighborhood_patterns[neighborhood]
        
        for _, meter in sample_meters.iterrows():
            for timestamp in date_range:
                hour = timestamp.hour
                day_of_week = timestamp.weekday()
                is_weekend = day_of_week >= 5
                
                occupancy = pattern['base_occupancy']
                
                if hour in pattern['peak_hours']:
                    occupancy *= 1.4
                elif hour < 6 or hour > 22:
                    occupancy *= 0.3
                
                if is_weekend:
                    occupancy *= pattern['weekend_boost']
                
                occupancy += np.random.normal(0, 0.1)
                occupancy = np.clip(occupancy, 0, 1)
                
                # seasonal stuff
                month = timestamp.month
                seasonal_factor = 1 + 0.2 * np.sin(2 * np.pi * (month - 6) / 12)
                occupancy *= seasonal_factor
                
                transactions.append({
                    'meter_id': meter['PARKING_SPACE_ID'] if pd.notna(meter['PARKING_SPACE_ID']) else f"METER_{meter['OBJECTID']}",
                    'neighborhood': neighborhood,
                    'timestamp': timestamp,
                    'occupancy_rate': occupancy,
                    'hour': hour,
                    'day_of_week': day_of_week,
                    'is_weekend': is_weekend,
                    'latitude': meter['LATITUDE'],
                    'longitude': meter['LONGITUDE']
                })
    
    return pd.DataFrame(transactions)

print("Generating synthetic data...")
transactions_df = generate_synthetic_transactions(meters_df, days=90)
print(f"Created {len(transactions_df)} records")
print(f"From {transactions_df['timestamp'].min()} to {transactions_df['timestamp'].max()}")

transactions_df.head()


In [None]:
print("Missing values:")
print(transactions_df.isnull().sum())

transactions_df = transactions_df.dropna(subset=['neighborhood', 'timestamp', 'occupancy_rate'])
print(f"After cleaning: {len(transactions_df)} records")

transactions_df['timestamp'] = pd.to_datetime(transactions_df['timestamp'])
transactions_df['date'] = transactions_df['timestamp'].dt.date
transactions_df['month'] = transactions_df['timestamp'].dt.month
transactions_df['day_name'] = transactions_df['timestamp'].dt.day_name()

print(f"\nDate range: {transactions_df['timestamp'].min()} to {transactions_df['timestamp'].max()}")
print(f"Neighborhoods: {transactions_df['neighborhood'].nunique()}")
print(f"Unique meters: {transactions_df['meter_id'].nunique()}")
print(f"Average occupancy: {transactions_df['occupancy_rate'].mean():.3f}")


In [None]:
# Aggregate to hourly occupancy per neighborhood
# This gives us the average occupancy rate for each neighborhood per hour

hourly_agg = transactions_df.groupby(['neighborhood', 'timestamp']).agg({
    'occupancy_rate': 'mean',
    'meter_id': 'count'  # Number of meters reporting
}).reset_index()

hourly_agg.columns = ['neighborhood', 'timestamp', 'avg_occupancy', 'meter_count']

# Add time features to aggregated data
hourly_agg['hour'] = hourly_agg['timestamp'].dt.hour
hourly_agg['day_of_week'] = hourly_agg['timestamp'].dt.weekday
hourly_agg['day_name'] = hourly_agg['timestamp'].dt.day_name()
hourly_agg['is_weekend'] = hourly_agg['day_of_week'] >= 5
hourly_agg['date'] = hourly_agg['timestamp'].dt.date

print(f"Aggregated data shape: {hourly_agg.shape}")
print("\nSample of aggregated data:")
hourly_agg.head(10)


In [None]:
# Overall occupancy patterns by hour of day
plt.figure(figsize=(14, 8))

# Subplot 1: Overall hourly pattern
plt.subplot(2, 2, 1)
hourly_pattern = hourly_agg.groupby('hour')['avg_occupancy'].mean()
plt.plot(hourly_pattern.index, hourly_pattern.values, 'b-', linewidth=2, marker='o')
plt.title('Average Occupancy by Hour of Day', fontsize=14, fontweight='bold')
plt.xlabel('Hour of Day')
plt.ylabel('Average Occupancy Rate')
plt.grid(True, alpha=0.3)
plt.xticks(range(0, 24, 2))

# Subplot 2: Weekday vs Weekend
plt.subplot(2, 2, 2)
weekday_hourly = hourly_agg[~hourly_agg['is_weekend']].groupby('hour')['avg_occupancy'].mean()
weekend_hourly = hourly_agg[hourly_agg['is_weekend']].groupby('hour')['avg_occupancy'].mean()

plt.plot(weekday_hourly.index, weekday_hourly.values, 'b-', linewidth=2, label='Weekday', marker='o')
plt.plot(weekend_hourly.index, weekend_hourly.values, 'r-', linewidth=2, label='Weekend', marker='s')
plt.title('Occupancy: Weekday vs Weekend', fontsize=14, fontweight='bold')
plt.xlabel('Hour of Day')
plt.ylabel('Average Occupancy Rate')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(range(0, 24, 2))

# Subplot 3: By neighborhood
plt.subplot(2, 2, 3)
neighborhood_avg = hourly_agg.groupby('neighborhood')['avg_occupancy'].mean().sort_values(ascending=False)
plt.bar(range(len(neighborhood_avg)), neighborhood_avg.values, color='skyblue', alpha=0.7)
plt.title('Average Occupancy by Neighborhood', fontsize=14, fontweight='bold')
plt.xlabel('Neighborhood')
plt.ylabel('Average Occupancy Rate')
plt.xticks(range(len(neighborhood_avg)), neighborhood_avg.index, rotation=45, ha='right')
plt.grid(True, alpha=0.3, axis='y')

# Subplot 4: Day of week pattern
plt.subplot(2, 2, 4)
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_pattern = hourly_agg.groupby('day_name')['avg_occupancy'].mean().reindex(day_order)
plt.bar(range(len(daily_pattern)), daily_pattern.values, color='lightgreen', alpha=0.7)
plt.title('Average Occupancy by Day of Week', fontsize=14, fontweight='bold')
plt.xlabel('Day of Week')
plt.ylabel('Average Occupancy Rate')
plt.xticks(range(len(daily_pattern)), daily_pattern.index, rotation=45)
plt.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print("Key observations:")
print(f"- Peak hours: {hourly_pattern.idxmax()} (occupancy: {hourly_pattern.max():.3f})")
print(f"- Lowest hours: {hourly_pattern.idxmin()} (occupancy: {hourly_pattern.min():.3f})")
print(f"- Best neighborhood: {neighborhood_avg.index[0]} ({neighborhood_avg.iloc[0]:.3f})")
print(f"- Worst neighborhood: {neighborhood_avg.index[-1]} ({neighborhood_avg.iloc[-1]:.3f})")


In [None]:
plt.figure(figsize=(15, 10))

heatmap_data = hourly_agg.groupby(['day_name', 'hour'])['avg_occupancy'].mean().unstack()
heatmap_data = heatmap_data.reindex(day_order)

plt.subplot(2, 1, 1)
sns.heatmap(heatmap_data, annot=True, fmt='.2f', cmap='YlOrRd', cbar_kws={'label': 'Occupancy Rate'})
plt.title('Occupancy by Hour and Day')
plt.xlabel('Hour of Day')
plt.ylabel('Day of Week')

plt.subplot(2, 1, 2)
neighborhood_hourly = hourly_agg.groupby(['neighborhood', 'hour'])['avg_occupancy'].mean().unstack()
sns.heatmap(neighborhood_hourly, annot=True, fmt='.2f', cmap='Blues', cbar_kws={'label': 'Occupancy Rate'})
plt.title('Occupancy by Hour and Neighborhood')
plt.xlabel('Hour of Day')
plt.ylabel('Neighborhood')

plt.tight_layout()
plt.show()

# these heatmaps actually look decent


In [None]:
# Prepare time series data for modeling
# Let's focus on Mission neighborhood as it has good occupancy patterns

mission_ts = hourly_agg[hourly_agg['neighborhood'] == 'Mission'].copy()
mission_ts = mission_ts.set_index('timestamp').sort_index()

# Create a complete time series (fill any missing hours)
full_index = pd.date_range(start=mission_ts.index.min(), end=mission_ts.index.max(), freq='H')
mission_ts = mission_ts.reindex(full_index)

# Fill missing values with forward fill then backward fill
mission_ts['avg_occupancy'] = mission_ts['avg_occupancy'].fillna(method='ffill').fillna(method='bfill')

print(f"Time series shape: {mission_ts.shape}")
print(f"Date range: {mission_ts.index.min()} to {mission_ts.index.max()}")
print(f"Missing values: {mission_ts['avg_occupancy'].isnull().sum()}")

# Plot the time series
plt.figure(figsize=(15, 6))
plt.plot(mission_ts.index, mission_ts['avg_occupancy'], linewidth=1, alpha=0.8)
plt.title('Mission Neighborhood - Hourly Occupancy Time Series', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Occupancy Rate')
plt.grid(True, alpha=0.3)
plt.show()

# Check stationarity
def check_stationarity(timeseries):
    result = adfuller(timeseries.dropna())
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value}')
    
    if result[1] <= 0.05:
        print("Series is stationary")
    else:
        print("Series is non-stationary")
    return result[1] <= 0.05

print("\nStationarity test:")
is_stationary = check_stationarity(mission_ts['avg_occupancy'])


In [None]:
# Split data into train and test sets
# Use last 7 days for testing
test_size = 24 * 7  # 7 days of hourly data
train_data = mission_ts['avg_occupancy'][:-test_size]
test_data = mission_ts['avg_occupancy'][-test_size:]

print(f"Training data: {len(train_data)} hours")
print(f"Test data: {len(test_data)} hours")
print(f"Train period: {train_data.index.min()} to {train_data.index.max()}")
print(f"Test period: {test_data.index.min()} to {test_data.index.max()}")

# Function to calculate forecast accuracy metrics
def calculate_metrics(actual, predicted):
    mae = mean_absolute_error(actual, predicted)
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'MAPE': mape
    }

# Plot train/test split
plt.figure(figsize=(15, 6))
plt.plot(train_data.index, train_data.values, label='Training Data', color='blue', alpha=0.7)
plt.plot(test_data.index, test_data.values, label='Test Data', color='red', alpha=0.7)
plt.title('Train-Test Split for Mission Neighborhood', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Occupancy Rate')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
print("Trying ARIMA models...")

# trying different parameters - probably should automate this but whatever
arima_orders = [(1,1,1), (2,1,1), (1,1,2), (2,1,2), (1,0,1), (2,0,1)]
best_aic = float('inf')
best_order = None
best_model = None

for order in arima_orders:
    try:
        model = ARIMA(train_data, order=order)
        fitted_model = model.fit()
        if fitted_model.aic < best_aic:
            best_aic = fitted_model.aic
            best_order = order
            best_model = fitted_model
    except:
        continue

print(f"Best ARIMA: {best_order}")
print(f"AIC: {best_aic:.2f}")

arima_forecast = best_model.forecast(steps=test_size)
arima_forecast_index = test_data.index

arima_metrics = calculate_metrics(test_data.values, arima_forecast)
print("\nARIMA results:")
for metric, value in arima_metrics.items():
    print(f"{metric}: {value:.4f}")

arima_accuracy = 100 - arima_metrics['MAPE']
print(f"Accuracy: {arima_accuracy:.2f}%")


In [None]:
# ETS (Exponential Smoothing) Model
print("Building ETS model...")

# Try different ETS configurations
ets_configs = [
    {'trend': 'add', 'seasonal': 'add', 'seasonal_periods': 24},  # Daily seasonality
    {'trend': 'add', 'seasonal': 'mul', 'seasonal_periods': 24},
    {'trend': 'mul', 'seasonal': 'add', 'seasonal_periods': 24},
    {'trend': None, 'seasonal': 'add', 'seasonal_periods': 24},
    {'trend': 'add', 'seasonal': None}
]

best_ets_aic = float('inf')
best_ets_config = None
best_ets_model = None

for config in ets_configs:
    try:
        model = ExponentialSmoothing(train_data, **config)
        fitted_model = model.fit()
        if fitted_model.aic < best_ets_aic:
            best_ets_aic = fitted_model.aic
            best_ets_config = config
            best_ets_model = fitted_model
    except:
        continue

print(f"Best ETS config: {best_ets_config}")
print(f"Best ETS AIC: {best_ets_aic:.2f}")

# Generate ETS forecasts
ets_forecast = best_ets_model.forecast(steps=test_size)

# Calculate ETS metrics
ets_metrics = calculate_metrics(test_data.values, ets_forecast)
print("\nETS Model Performance:")
for metric, value in ets_metrics.items():
    print(f"{metric}: {value:.4f}")

# Calculate accuracy percentage
ets_accuracy = 100 - ets_metrics['MAPE']
print(f"ETS Accuracy: {ets_accuracy:.2f}%")

# Compare models
print(f"\nModel Comparison:")
print(f"ARIMA Accuracy: {arima_accuracy:.2f}%")
print(f"ETS Accuracy: {ets_accuracy:.2f}%")

if arima_accuracy > ets_accuracy:
    print("ARIMA performs better!")
    best_forecast = arima_forecast
    best_accuracy = arima_accuracy
    best_model_name = "ARIMA"
else:
    print("ETS performs better!")
    best_forecast = ets_forecast
    best_accuracy = ets_accuracy
    best_model_name = "ETS"


In [None]:
# Visualize actual vs forecasted values
plt.figure(figsize=(16, 10))

# Plot 1: Full comparison
plt.subplot(2, 1, 1)
plt.plot(train_data.index[-168:], train_data.values[-168:], label='Training Data (Last Week)', color='blue', alpha=0.7)
plt.plot(test_data.index, test_data.values, label='Actual', color='black', linewidth=2)
plt.plot(arima_forecast_index, arima_forecast, label='ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.plot(arima_forecast_index, ets_forecast, label='ETS Forecast', color='green', linestyle='--', linewidth=2)
plt.title('Parking Occupancy Forecasting - Mission Neighborhood', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Occupancy Rate')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Zoomed in on test period
plt.subplot(2, 1, 2)
plt.plot(test_data.index, test_data.values, label='Actual', color='black', linewidth=2, marker='o', markersize=3)
plt.plot(arima_forecast_index, arima_forecast, label=f'ARIMA Forecast (Acc: {arima_accuracy:.1f}%)', 
         color='red', linestyle='--', linewidth=2, marker='s', markersize=3)
plt.plot(arima_forecast_index, ets_forecast, label=f'ETS Forecast (Acc: {ets_accuracy:.1f}%)', 
         color='green', linestyle='--', linewidth=2, marker='^', markersize=3)
plt.title('Detailed View - Test Period', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Occupancy Rate')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print summary
print(f"\nForecast Summary:")
print(f"Best Model: {best_model_name}")
print(f"Best Accuracy: {best_accuracy:.2f}%")
print(f"Target Accuracy: 92%")
print(f"Target Met: {'Yes' if best_accuracy >= 92 else 'No'}")

# some additional stats for the presentation
print(f"\nAdditional Statistics:")
print(f"Mean Absolute Error: {arima_metrics['MAE']:.4f}")
print(f"Root Mean Square Error: {arima_metrics['RMSE']:.4f}")


In [None]:
def calculate_dynamic_pricing(predicted_occupancy, base_price=2.50):
    pricing_tiers = []
    
    for occupancy in predicted_occupancy:
        if occupancy < 0.5:
            price_multiplier = 0.8
            tier = "Low"
        elif occupancy < 0.7:
            price_multiplier = 1.0
            tier = "Base"
        elif occupancy < 0.85:
            price_multiplier = 1.3
            tier = "High"
        else:
            price_multiplier = 1.6  # this might be too aggressive but let's see
            tier = "Peak"
        
        dynamic_price = base_price * price_multiplier
        pricing_tiers.append({
            'occupancy': occupancy,
            'price': dynamic_price,
            'multiplier': price_multiplier,
            'tier': tier
        })
    
    return pricing_tiers

pricing_strategy = calculate_dynamic_pricing(best_forecast)
pricing_df = pd.DataFrame(pricing_strategy)
pricing_df['timestamp'] = arima_forecast_index
pricing_df['hour'] = pricing_df['timestamp'].dt.hour

print("Pricing tiers:")
print(pricing_df.groupby('tier').agg({
    'price': ['mean', 'count'],
    'occupancy': 'mean'
}).round(2))

# using elasticity of -0.5 (seems reasonable for parking)
price_elasticity = -0.5
base_price = 2.50

pricing_df['price_change_pct'] = (pricing_df['price'] - base_price) / base_price * 100
pricing_df['expected_demand_change'] = pricing_df['price_change_pct'] * price_elasticity / 100
pricing_df['adjusted_occupancy'] = pricing_df['occupancy'] * (1 + pricing_df['expected_demand_change'])

peak_hours = pricing_df[pricing_df['occupancy'] > 0.85]
if len(peak_hours) > 0:
    original_peak_occupancy = peak_hours['occupancy'].mean()
    adjusted_peak_occupancy = peak_hours['adjusted_occupancy'].mean()
    congestion_reduction = (original_peak_occupancy - adjusted_peak_occupancy) / original_peak_occupancy * 100
    
    print(f"\nCongestion analysis:")
    print(f"Original peak: {original_peak_occupancy:.3f}")
    print(f"With pricing: {adjusted_peak_occupancy:.3f}")
    print(f"Reduction: {congestion_reduction:.1f}%")
    print(f"Target was 18% - {'hit it!' if congestion_reduction >= 18 else 'close enough'}")
else:
    print("No peak hours found")


In [None]:
# Visualize dynamic pricing strategy
plt.figure(figsize=(16, 12))

# Plot 1: Occupancy vs Price over time
plt.subplot(3, 1, 1)
plt.plot(pricing_df['timestamp'], pricing_df['occupancy'], label='Predicted Occupancy', color='blue', linewidth=2)
plt.plot(pricing_df['timestamp'], pricing_df['adjusted_occupancy'], label='Adjusted Occupancy (with pricing)', 
         color='red', linestyle='--', linewidth=2)
plt.title('Occupancy Forecast with Dynamic Pricing Impact', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Occupancy Rate')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Dynamic pricing over time
plt.subplot(3, 1, 2)
colors = {'Low': 'green', 'Base': 'blue', 'High': 'orange', 'Peak': 'red'}
for tier in pricing_df['tier'].unique():
    tier_data = pricing_df[pricing_df['tier'] == tier]
    plt.scatter(tier_data['timestamp'], tier_data['price'], 
               label=f'{tier} Tier', color=colors.get(tier, 'gray'), alpha=0.7, s=30)

plt.title('Dynamic Pricing Strategy Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 3: Hourly pricing pattern
plt.subplot(3, 1, 3)
hourly_pricing = pricing_df.groupby('hour').agg({
    'price': 'mean',
    'occupancy': 'mean',
    'adjusted_occupancy': 'mean'
}).reset_index()

plt.bar(hourly_pricing['hour'], hourly_pricing['price'], alpha=0.7, color='skyblue', 
        label='Average Price')
plt.title('Average Hourly Pricing Pattern', fontsize=16, fontweight='bold')
plt.xlabel('Hour of Day')
plt.ylabel('Average Price ($)')
plt.legend()
plt.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

# Revenue analysis
base_revenue = len(pricing_df) * base_price  # Assuming 1 hour per parking session
dynamic_revenue = pricing_df['price'].sum()
revenue_change = (dynamic_revenue - base_revenue) / base_revenue * 100

print(f"\nRevenue Analysis:")
print(f"Base revenue (fixed pricing): ${base_revenue:.2f}")
print(f"Dynamic pricing revenue: ${dynamic_revenue:.2f}")
print(f"Revenue change: {revenue_change:.1f}%")

# this is probably not super accurate but gives us a ballpark estimate


In [None]:
historical_data = []
for _, row in hourly_agg.iterrows():
    historical_data.append({
        'timestamp': row['timestamp'],
        'neighborhood': row['neighborhood'],
        'occupancy_rate': row['avg_occupancy'],
        'meter_count': row['meter_count'],
        'hour': row['hour'],
        'day_of_week': row['day_of_week'],
        'day_name': row['day_name'],
        'is_weekend': row['is_weekend'],
        'data_type': 'Historical',
        'predicted_occupancy': row['avg_occupancy'],
        'recommended_price': 2.50,
        'pricing_tier': 'Base'
    })

forecast_data = []
for i, timestamp in enumerate(arima_forecast_index):
    forecast_data.append({
        'timestamp': timestamp,
        'neighborhood': 'Mission',
        'occupancy_rate': test_data.iloc[i],
        'meter_count': 50,
        'hour': timestamp.hour,
        'day_of_week': timestamp.weekday(),
        'day_name': timestamp.strftime('%A'),
        'is_weekend': timestamp.weekday() >= 5,
        'data_type': 'Forecast',
        'predicted_occupancy': best_forecast[i],
        'recommended_price': pricing_df.iloc[i]['price'],
        'pricing_tier': pricing_df.iloc[i]['tier']
    })

export_data = historical_data + forecast_data
export_df = pd.DataFrame(export_data)

export_df['price_vs_base'] = export_df['recommended_price'] / 2.50
export_df['occupancy_category'] = pd.cut(export_df['occupancy_rate'], 
                                        bins=[0, 0.5, 0.7, 0.85, 1.0], 
                                        labels=['Low', 'Medium', 'High', 'Very High'])

export_df['date'] = pd.to_datetime(export_df['timestamp']).dt.date
export_df['month'] = pd.to_datetime(export_df['timestamp']).dt.month
export_df['week'] = pd.to_datetime(export_df['timestamp']).dt.isocalendar().week

print(f"Export dataset: {export_df.shape}")
print(f"Date range: {export_df['timestamp'].min()} to {export_df['timestamp'].max()}")
print(f"Data types: {export_df['data_type'].value_counts()}")

export_filename = 'sf_parking_forecast_analysis.csv'
export_df.to_csv(export_filename, index=False)
print(f"\nExported to: {export_filename}")

print("\nSample:")
export_df.head(10)
