This model predicts the status of parking bays (Occupied or unoccupied) using sensor data. The main input features are hour, weekday, month, zone number, latitude, and longitude, which are extracted from the given dataset.

# 0. Import libraries

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import joblib

# 1. Data loading and preprocessing

In [12]:
# Load data
df = pd.read_csv('on-street-parking-bay-sensors.csv')

# Parse Status_Timestamp as datetime
df['Status_Timestamp'] = pd.to_datetime(df['Status_Timestamp'])

# Extract periodic time features
df['hour'] = df['Status_Timestamp'].dt.hour
df['weekday'] = df['Status_Timestamp'].dt.weekday  # 0=Monday
df['month'] = df['Status_Timestamp'].dt.month

# Select required features and output
selected_columns = [
    'Zone_Number',
    'Location',  # split into latitude/longitude
    'hour',
    'weekday',
    'month',
    'Status_Description'  # Output variable
]

df_selected = df[selected_columns].copy()

# Split coordinates into separate columns
df_selected[['latitude', 'longitude']] = df_selected['Location'].str.split(',', expand=True)
df_selected['latitude'] = df_selected['latitude'].astype(float)
df_selected['longitude'] = df_selected['longitude'].astype(float)

# Drop original Location column
df_selected = df_selected.drop(columns=['Location'])

# Display processed data
print(df_selected.head())

   Zone_Number  hour  weekday  month Status_Description   latitude   longitude
0          NaN    10        4      5         Unoccupied -37.823395  144.966748
1       7539.0    12        4     10         Unoccupied -37.811640  144.960617
2       7550.0     7        2     10            Present -37.809940  144.966355
3       7550.0     6        2     10         Unoccupied -37.810365  144.964902
4       7549.0     6        2     10         Unoccupied -37.810206  144.965631


# 2. Data preparation for modeling

In [13]:
# Features and labels (Unoccupied=0, Present=1)
X = df_selected[['latitude', 'longitude', 'hour', 'weekday', 'month', 'Zone_Number']]
y = (df_selected['Status_Description'] == 'Present').astype(int)  # 1=occupied, 0=unoccupied

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3. Model training and hyperparameter tuning

In [14]:
# Parameter grid for tuning
param_grid = {
    'n_estimators': [100, 200],            # Number of trees
    'max_depth': [8, 12, 16],              # Maximum tree depth
    'min_samples_split': [2, 5, 10],       # Minimum samples for node split
    'min_samples_leaf': [1, 3, 5],         # Minimum samples in leaf node
    'max_features': ['sqrt', 'log2']       # Feature selection method for splits
}

# Initialize Random Forest model
rf = RandomForestClassifier(random_state=42)

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters: {'max_depth': 16, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200}
Best cross-validation score: 0.6879480686236046


# 4. Model evaluation

In [15]:
# Evaluate on test set using best model
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Overall accuracy
print("\nTest Accuracy:", accuracy_score(y_test, y_pred))

Confusion Matrix:
[[245  99]
 [104 214]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.71      0.71       344
           1       0.68      0.67      0.68       318

    accuracy                           0.69       662
   macro avg       0.69      0.69      0.69       662
weighted avg       0.69      0.69      0.69       662


Test Accuracy: 0.6933534743202417


# 5. Model saving

In [16]:
# Save the trained model
joblib.dump(best_rf, 'best_rf_model.pkl')

['best_rf_model.pkl']