In [13]:
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from sklearn.preprocessing import MinMaxScaler
import import_ipynb

from prepare_data_v2 import prepare_parking_info_df_v2, prepare_historical_parking_df_v2

load_dotenv()

numerical_features = [
    'total_lots',
    'x_coord',
    'y_coord',
    'sin_hour',
    'cos_hour',
    'sin_day_of_week',
    'cos_day_of_week'
]

# fetch prepared car lot info (static)
parking_info_df = prepare_parking_info_df_v2()
# fetch prepared car parking data (historical)
historical_parking_df = prepare_historical_parking_df_v2(use_mean_sampling=True)
historical_parking_df = historical_parking_df.drop(columns=['sin_month', 'cos_month'])
# get a smaller set of historical parking dataframe to work upon
historical_parking_df = historical_parking_df[:100000]

# prepare a resultant DataFrame
resultant_df = pd.merge(historical_parking_df, parking_info_df, on='car_park_number', how='inner')

scaler = MinMaxScaler()
resultant_df[numerical_features] = scaler.fit_transform(resultant_df[numerical_features])

# fit and transform available_lots column
#resultant_df['available_lots'] = scaler.fit_transform(resultant_df[['available_lots']])

print("Resultant dataframe shape = ", resultant_df.shape)
print("Resultant dataframe top 5")
print(resultant_df.head())

#return resultant_df

Car park static info shape =  (2243, 4)
Car park static info top 5
  car_park_number     x_coord     y_coord  car_park_decks
0             ACB  30314.7936  31490.4942               1
1             ACM  33758.4143  33695.5198               5
2             AH1  29257.7203  34500.3599               0
3            AK19  28185.4359  39012.6664               0
4            AK31  29482.0290  38684.1754               0




Historical parking data shape =  (15987562, 9)
Historical parking data top 5
  car_park_number  available_lots  total_lots  sin_hour  cos_hour  \
0             A10       30.000000          62 -0.258819  0.965926   
1             A10       29.000000          62  0.000000  1.000000   
2             A10       27.000000          62  0.000000  1.000000   
3             A10       24.833333          62  0.258819  0.965926   
4             A10       24.666667          62  0.258819  0.965926   

   sin_day_of_week  cos_day_of_week     sin_month  cos_month  
0        -0.974928        -0.222521 -2.449294e-16   1.000000  
1        -0.781831         0.623490  5.000000e-01   0.866025  
2        -0.781831         0.623490  5.000000e-01   0.866025  
3        -0.781831         0.623490  5.000000e-01   0.866025  
4        -0.781831         0.623490  5.000000e-01   0.866025  
Resultant dataframe shape =  (100000, 30)
Resultant dataframe top 5
  car_park_number  available_lots  total_lots  sin_hour  cos_h

In [9]:
print(resultant_df.columns)

Index(['car_park_number', 'available_lots', 'total_lots', 'sin_hour',
       'cos_hour', 'sin_day_of_week', 'cos_day_of_week', 'x_coord', 'y_coord',
       'car_park_decks', 'car_park_type_BASEMENT CAR PARK',
       'car_park_type_COVERED CAR PARK',
       'car_park_type_MECHANISED AND SURFACE CAR PARK',
       'car_park_type_MECHANISED CAR PARK',
       'car_park_type_MULTI-STOREY CAR PARK', 'car_park_type_SURFACE CAR PARK',
       'car_park_type_SURFACE/MULTI-STOREY CAR PARK',
       'type_of_parking_system_COUPON PARKING',
       'type_of_parking_system_ELECTRONIC PARKING',
       'short_term_parking_7AM-10.30PM', 'short_term_parking_7AM-7PM',
       'short_term_parking_NO', 'short_term_parking_WHOLE DAY',
       'free_parking_NO', 'free_parking_SUN & PH FR 1PM-10.30PM',
       'free_parking_SUN & PH FR 7AM-10.30PM', 'night_parking_NO',
       'night_parking_YES', 'car_park_basement_N', 'car_park_basement_Y'],
      dtype='object')


In [31]:
import pmdarima as pm
import pandas as pd
import pickle  # For saving and loading models

# Function to train ARIMA models for each parking lot
def train_arima_models(df):
    # Dictionary to store trained models for each parking lot
    parking_lot_models = {}
    
    # Get unique parking lot numbers
    parking_lots = df['car_park_number'].unique()
    
    # Loop through each parking lot and train an ARIMA model
    for lot in parking_lots:
        df_parking_lot = df[df['car_park_number'] == lot]
        available_lots = df_parking_lot['available_lots']
        
        # Train auto_arima to find the best ARIMA model for this parking lot
        model_auto = pm.auto_arima(available_lots,
                                   start_p=1, start_q=1,
                                   max_p=5, max_q=5,
                                   seasonal=False,
                                   trace=False,
                                   error_action='ignore',
                                   suppress_warnings=True,
                                   stepwise=True)
        
        # Save the trained model into the dictionary
        parking_lot_models[lot] = model_auto
    
    # Save all trained models to disk for future use
#    with open('parking_lot_arima_models.pkl', 'wb') as f:
#        pickle.dump(parking_lot_models, f)

    return parking_lot_models

# Example usage of the function
# df is your dataframe containing 'car_park_number', 'timestamp', 'available_lots'
parking_lot_models = train_arima_models(resultant_df)


In [27]:
!pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp312-cp312-win_amd64.whl.metadata (8.0 kB)
Collecting Cython!=0.29.18,!=0.29.31,>=0.29 (from pmdarima)
  Downloading Cython-3.0.11-cp312-cp312-win_amd64.whl.metadata (3.2 kB)
Downloading pmdarima-2.0.4-cp312-cp312-win_amd64.whl (625 kB)
   ---------------------------------------- 0.0/625.1 kB ? eta -:--:--
    --------------------------------------- 10.2/625.1 kB ? eta -:--:--
   - ------------------------------------- 30.7/625.1 kB 435.7 kB/s eta 0:00:02
   ----------- ---------------------------- 174.1/625.1 kB 1.7 MB/s eta 0:00:01
   ------------------------------------- -- 583.7/625.1 kB 4.1 MB/s eta 0:00:01
   ---------------------------------------- 625.1/625.1 kB 3.9 MB/s eta 0:00:00
Downloading Cython-3.0.11-cp312-cp312-win_amd64.whl (2.8 MB)
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   ------ --------------------------------- 0.4/2.8 MB 13.2 MB/s eta 0:00:01
   -------------- -------------------------

In [33]:
#import pickle
import pandas as pd
from datetime import timedelta

# Function to predict available lots for a given parking lot and future timestamp
def predict_available_lots(car_park_number, future_time, n_periods=1):
    # Load the trained models from the pickle file
#    with open('parking_lot_arima_models.pkl', 'rb') as f:
#        parking_lot_models = pickle.load(f)
    
    # Get the trained ARIMA model for the specific parking lot
    if car_park_number in parking_lot_models:
        model_auto = parking_lot_models[car_park_number]
        
        # Forecast the available lots for the next n_periods (you can adjust based on future time)
        forecast = model_auto.predict(n_periods=n_periods)
        
        return forecast
    else:
        print(f"No model found for parking lot: {car_park_number}")
        return None

# Example usage
# Predict for parking lot 'A123' for the next hour (or you can customize this based on your time logic)
predicted_lots = predict_available_lots('A123', future_time='2024-10-21 14:00', n_periods=1)
print(f"Predicted available lots: {predicted_lots}")


No model found for parking lot: A123
Predicted available lots: None


In [45]:
def prepare_test_data(resultant_df, test_size=0.2):
    # Dictionary to hold test sets for each parking lot
    test_data = {}

    # Get unique parking lot numbers
    parking_lots = resultant_df['car_park_number'].unique()

    for lot in parking_lots:
        # Filter data for the specific parking lot
        df_parking_lot = resultant_df[resultant_df['car_park_number'] == lot].copy()

        # Available lots is the target variable
        available_lots = df_parking_lot['available_lots']

        # Calculate the index to split the data into train and test
        train_size = int(len(available_lots) * (1 - test_size))
        test = available_lots[train_size:]  # Use the last 20% as the test set

        # Store the test set for this parking lot
        test_data[lot] = test

    return test_data

# Prepare the test data
test_data = prepare_test_data(resultant_df)


In [55]:
import pmdarima as pm
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# Assuming 'available_lots' is the column you're trying to predict
available_lots = resultant_df['available_lots']

# Step 1: Train-test split based on index (80% train, 20% test)
train_size = int(len(available_lots) * 0.8)  # Use 80% for training
train, test = available_lots[:train_size], available_lots[train_size:]

# Step 2: Train the ARIMA model on the training data
model_auto = pm.auto_arima(train,
                           start_p=5, start_q=5,
                           max_p=10, max_q=10,
                           seasonal=False,
                           trace=False,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=True)

# Step 3: Make predictions on the test set
n_periods = len(test)  # Forecast for the same number of periods as the test set
forecast = model_auto.predict(n_periods=n_periods)

# Step 4: Evaluation
mae = mean_absolute_error(test, forecast)
mse = mean_squared_error(test, forecast)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(test, forecast)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')


MAE: 90.3119436669136
MSE: 12740.326516876277
RMSE: 112.8730548752725
MAPE: 3159099786657248.5
