In [None]:
import pandas as pd

# Read only first 30 rows
df = pd.read_csv(
    '/kaggle/input/parking-dataset/On-street_Car_Parking_Sensor_Data_-_2019.csv',
    nrows=30
)

print(df)
df.to_csv('first_30_rows.csv', index=False)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# Load only necessary columns
df = pd.read_csv(
    '/kaggle/input/parking-dataset/On-street_Car_Parking_Sensor_Data_-_2019.csv',
    usecols=['AreaName', 'BayId', 'ArrivalTime', 'DepartureTime'],
    low_memory=False
)

# Convert timestamps safely
df['ArrivalTime'] = pd.to_datetime(df['ArrivalTime'], errors='coerce')
df['DepartureTime'] = pd.to_datetime(df['DepartureTime'], errors='coerce')

# Remove invalid rows
df = df.dropna(subset=['ArrivalTime', 'DepartureTime'])

# Select busiest area automatically
area = df['AreaName'].value_counts().idxmax()
print("Selected Area:", area)

df_area = df[df['AreaName'] == area].copy()

# Count unique parking bays in area
total_bays = df_area['BayId'].nunique()
print("Total bays:", total_bays)

# Snap times to nearest 5-minute grid
df_area['start'] = df_area['ArrivalTime'].dt.floor('5min')
df_area['end']   = df_area['DepartureTime'].dt.ceil('5min')

# +1 when car arrives, -1 when leaves
events = pd.concat([
    df_area[['start', 'BayId']].rename(columns={'start':'time'}).assign(change=1),
    df_area[['end', 'BayId']].rename(columns={'end':'time'}).assign(change=-1)
])

# Aggregate changes and compute occupancy
ts = events.groupby('time')['change'].sum().sort_index().cumsum()

# Convert occupancy â†’ free spots
ts_free = (total_bays - ts).rename('free_spots')

print(ts_free.head(20))


In [None]:
#Convert ts_free into a Multi-Feature DataFrame
df_ts = ts_free.to_frame(name='free_spots')

# Add time features
df_ts['hour'] = df_ts.index.hour
df_ts['day_of_week'] = df_ts.index.dayofweek
df_ts['is_weekend'] = (df_ts['day_of_week'] >= 5).astype(int)

print(df_ts.head(20))


In [None]:
#scale all features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
ts_scaled = scaler.fit_transform(df_ts)  # shape: (n_samples, 4 features)


In [None]:
# Create Sliding Windows 
window_size = 24   # past 2 hours
horizon = 6        # predict 30 min into future

X, y = [], []
data = ts_scaled

for i in range(len(data) - window_size - horizon):
    X.append(data[i:i+window_size])          # sequence of ALL features
    y.append(data[i+window_size+horizon][0]) # target = free_spots (feature index 0)

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)   # expected: (samples, 24, 4)
print("y shape:", y.shape)


In [None]:
# Train-test split 
train_size = int(len(X) * 0.8)

X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# reshape to (samples, timesteps, features)
print("Before reshape:", X_train.shape)


In [None]:
# Time-series aware cross validation 
tscv = TimeSeriesSplit(n_splits=5)

def build_model():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(window_size, 4)),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model


fold = 1
for train_idx, val_idx in tscv.split(X_train):
    print(f"\n===== Fold {fold} =====")
    
    model = build_model()
    model.fit(
        X_train[train_idx], y_train[train_idx],
        validation_data=(X_train[val_idx], y_train[val_idx]),
        epochs=50,
        batch_size=64,
        verbose=1
    )
    
    fold += 1

In [None]:
# Train Final Model 
model = build_model()
model.fit(X_train, y_train, epochs=50, batch_size=64, verbose=1)

In [None]:
y_pred_scaled = model.predict(X_test)

temp_true = np.zeros((len(y_test), ts_scaled.shape[1]))
temp_pred = np.zeros((len(y_pred_scaled), ts_scaled.shape[1]))

temp_true[:,0] = y_test
temp_pred[:,0] = y_pred_scaled.flatten()

y_true = scaler.inverse_transform(temp_true)[:,0]
y_pred = scaler.inverse_transform(temp_pred)[:,0]


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print("MAE:", mae, "RMSE:", rmse)


In [None]:
#  Show sample prediction 
compare = pd.DataFrame({
    'Actual_Free_Spots': y_true.flatten(),
    'Predicted_Free_Spots': y_pred.flatten()
})

print(compare.head(20))

In [None]:
import pandas as pd

# Get the timestamps corresponding to your test set
# ts_free.index is the full timeline
# X_test starts at index train_size + window_size (because of sliding window)
start_idx = train_size + window_size + horizon  # first prediction timestamp in original series
timestamps = ts_free.index[start_idx : start_idx + len(y_test)]

# Build DataFrame with timestamp, actual, predicted
compare = pd.DataFrame({
    'timestamp': timestamps,
    'Actual_Free_Spots': y_true.flatten(),
    'Predicted_Free_Spots': y_pred.flatten()
})

# Save to CSV
compare.to_csv("parking_predictions_with_timestamps.csv", index=False)
print("Saved: parking_predictions_with_timestamps.csv")


In [None]:
model.save("lstm_parking_model.keras")
print("Saved LSTM model as 'lstm_parking_model.keras'")


In [None]:
import joblib
joblib.dump(scaler, "scaler_parking.save")


In [None]:
import pandas as pd
import numpy as np

# Load necessary columns only
df = pd.read_csv(
    '/kaggle/input/parking-dataset/On-street_Car_Parking_Sensor_Data_-_2019.csv',
    usecols=['AreaName', 'BayId', 'ArrivalTime', 'DepartureTime'],
    low_memory=False
)

# Convert timestamps
df['ArrivalTime'] = pd.to_datetime(df['ArrivalTime'], errors='coerce')
df['DepartureTime'] = pd.to_datetime(df['DepartureTime'], errors='coerce')
df = df.dropna(subset=['ArrivalTime', 'DepartureTime'])

# Find the 3 busiest areas
top_areas = df['AreaName'].value_counts().head(3).index.tolist()
print("Top 3 areas:", top_areas)

# Loop through each area
results = []

for area in top_areas:
    df_area = df[df['AreaName'] == area].copy()
    total_bays = df_area['BayId'].nunique()
    
    # Snap times to 5-minute grid
    df_area['start'] = df_area['ArrivalTime'].dt.floor('5min')
    df_area['end']   = df_area['DepartureTime'].dt.ceil('5min')
    
    # Create occupancy events (+1 arrival, -1 departure)
    events = pd.concat([
        df_area[['start','BayId']].rename(columns={'start':'time'}).assign(change=1),
        df_area[['end','BayId']].rename(columns={'end':'time'}).assign(change=-1)
    ])
    
    # Compute cumulative occupancy -> free spots
    ts = events.groupby('time')['change'].sum().sort_index().cumsum()
    ts_free = (total_bays - ts).rename('free_spots')
    
    # Ensure continuous 5-min timeline
    ts_free = ts_free.asfreq('5min').interpolate()
    
    # Convert to DataFrame and add time features
    df_ts = ts_free.to_frame()
    df_ts['hour'] = df_ts.index.hour
    df_ts['day_of_week'] = df_ts.index.dayofweek
    df_ts['is_weekend'] = (df_ts['day_of_week'] >= 5).astype(int)
    
    # Scale using existing scaler
    ts_scaled = scaler.transform(df_ts)
    
    # Create sliding windows for prediction
    window_size = 24   # 2 hours
    horizon = 6        # predict 30 min ahead
    X_pred = []
    timestamps = []
    
    for i in range(len(ts_scaled) - window_size - horizon):
        X_pred.append(ts_scaled[i:i+window_size])
        timestamps.append(df_ts.index[i + window_size + horizon])
    
    X_pred = np.array(X_pred)
    
    # Predict using trained LSTM
    y_pred_scaled = model.predict(X_pred, verbose=0)
    
    # Inverse scale only the first column (free_spots)
    temp_pred = np.zeros((len(y_pred_scaled), ts_scaled.shape[1]))
    temp_pred[:,0] = y_pred_scaled.flatten()
    y_pred = scaler.inverse_transform(temp_pred)[:,0]
    
    # Save results
    df_result = pd.DataFrame({
        'AreaName': area,
        'timestamp': timestamps,
        'Predicted_Free_Spots': y_pred
    })
    results.append(df_result)

# Combine all areas
df_all_areas = pd.concat(results, ignore_index=True)

# Save to CSV
df_all_areas.to_csv("predicted_free_spots_top3areas.csv", index=False)
print("Saved: predicted_free_spots_top3areas.csv")


In [None]:
actual_results = []

for area in top_areas:
    df_area = df[df['AreaName'] == area].copy()
    total_bays = df_area['BayId'].nunique()
    
    # Snap times
    df_area['start'] = df_area['ArrivalTime'].dt.floor('5min')
    df_area['end'] = df_area['DepartureTime'].dt.ceil('5min')
    
    events = pd.concat([
        df_area[['start','BayId']].rename(columns={'start':'time'}).assign(change=1),
        df_area[['end','BayId']].rename(columns={'end':'time'}).assign(change=-1)
    ])
    
    ts = events.groupby('time')['change'].sum().sort_index().cumsum()
    ts_free = (total_bays - ts).rename('free_spots')
    ts_free = ts_free.asfreq('5min').interpolate()
    
    df_actual = pd.DataFrame({
        'AreaName': area,
        'timestamp': ts_free.index,
        'Actual_Free_Spots': ts_free.values
    })
    
    actual_results.append(df_actual)

df_actual_all = pd.concat(actual_results, ignore_index=True)


In [None]:
df_compare = pd.merge(
    df_all_areas,            # predicted
    df_actual_all,           # actual
    on=['AreaName','timestamp'],
    how='inner'
)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

mae = mean_absolute_error(df_compare['Actual_Free_Spots'], df_compare['Predicted_Free_Spots'])
rmse = np.sqrt(mean_squared_error(df_compare['Actual_Free_Spots'], df_compare['Predicted_Free_Spots']))

print(f"Overall MAE across top 3 areas: {mae:.2f}")
print(f"Overall RMSE across top 3 areas: {rmse:.2f}")


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Get list of areas in predictions
areas = df_compare['AreaName'].unique()

# Store results
area_metrics = []

for area in areas:
    df_area = df_compare[df_compare['AreaName'] == area]
    mae = mean_absolute_error(df_area['Actual_Free_Spots'], df_area['Predicted_Free_Spots'])
    rmse = np.sqrt(mean_squared_error(df_area['Actual_Free_Spots'], df_area['Predicted_Free_Spots']))
    area_metrics.append({'AreaName': area, 'MAE': mae, 'RMSE': rmse})

df_metrics = pd.DataFrame(area_metrics)

print(df_metrics)
