In [97]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load the data
df = pd.read_csv('dataset.csv')

# Convert 'Airport' and 'Terminal' into categorical numerical codes
label_encoder = LabelEncoder()
df['Airport'] = label_encoder.fit_transform(df['Airport'])
df['Terminal'] = label_encoder.fit_transform(df['Terminal'])

In [98]:
# Features
X = df[['Airport', 'Terminal', 'Month', 'Day of Month', 'Hour', 'Total Passengers', 'Flights']]

# Targets
Y = df[['Citizen Avg Wait Time', 'Citizen Max Wait Time', 'Non citizen Avg Wait Time', 'Non citizen Max Wait Time']]

In [99]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Y_train = 400 * np.log(Y_train + 100) - 800

In [102]:
# Initialize column number of XGBoost regressors
models = [xgb.XGBRegressor(objective="reg:squarederror") for _ in range(Y.shape[1])]

# Train the model on each target
for column in Y.columns:
    model = models[Y.columns.get_loc(column)]
    model.fit(X_train, Y_train[column])
    print(f"Model for {column} trained")

Model for Citizen Avg Wait Time trained
Model for Citizen Max Wait Time trained
Model for Non citizen Avg Wait Time trained
Model for Non citizen Max Wait Time trained


In [103]:
from sklearn.metrics import mean_absolute_error
import numpy as np

# Evaluate the model on train data
print("Mean Absolute Error for each target on train data when Flights > 0:")

for column in Y.columns:
    model = models[Y.columns.get_loc(column)]
    X_train_nonzero_flights = X_train[X_train['Flights'] != 0]
    Y_train_nonzero_flights = Y_train[X_train['Flights'] != 0]
    predictions = model.predict(X_train_nonzero_flights)
    # exponentiate the predictions
    predictions = np.exp((predictions + 800) / 400) - 100
    predictions = np.maximum(predictions, 0)
    mae = mean_absolute_error(np.exp((Y_train_nonzero_flights[column] + 800) / 400) - 100, predictions)
    print(f"Mean Absolute Error for {column}: {mae}")

print("\n")
print("Mean Absolute Error for each target on test data:")
# Evaluate the model
for column in Y.columns:
    model = models[Y.columns.get_loc(column)]
    predictions = model.predict(X_test)
    # exponentiate the predictions, forward was 400\log\left(x+100\right)\ -\ 800
    predictions = np.exp((predictions + 800) / 400) - 100
    predictions = np.maximum(predictions, 0)
    mae = mean_absolute_error(Y_test[column], predictions)
    print(f"Mean Absolute Error for {column}: {mae}")

print("\n")
print("Mean Absolute Error for each target when Flights > 0:")


for column in Y.columns:
    model = models[Y.columns.get_loc(column)]
    X_test_nonzero_flights = X_test[X_test['Flights'] != 0]
    Y_test_nonzero_flights = Y_test[X_test['Flights'] != 0]
    predictions = model.predict(X_test_nonzero_flights)
    # exponentiate the predictions
    predictions = np.exp((predictions + 800) / 400) - 100
    predictions = np.maximum(predictions, 0)
    mae = mean_absolute_error(Y_test_nonzero_flights[column], predictions)
    print(f"Mean Absolute Error for {column}: {mae}")

Mean Absolute Error for each target on train data when Flights > 0:
Mean Absolute Error for Citizen Avg Wait Time: 5.02848548755989
Mean Absolute Error for Citizen Max Wait Time: 11.687301762181686
Mean Absolute Error for Non citizen Avg Wait Time: 8.668876288289331
Mean Absolute Error for Non citizen Max Wait Time: 13.765930590221831


Mean Absolute Error for each target on test data:
Mean Absolute Error for Citizen Avg Wait Time: 2.009811676106839
Mean Absolute Error for Citizen Max Wait Time: 4.649659077803307
Mean Absolute Error for Non citizen Avg Wait Time: 3.459789596045741
Mean Absolute Error for Non citizen Max Wait Time: 5.47027079670574


Mean Absolute Error for each target when Flights > 0:
Mean Absolute Error for Citizen Avg Wait Time: 5.190870019222621
Mean Absolute Error for Citizen Max Wait Time: 12.042365434374183
Mean Absolute Error for Non citizen Avg Wait Time: 8.92594151181394
Mean Absolute Error for Non citizen Max Wait Time: 14.153943379460786


Mean Absolute Error for Citizen Avg Wait Time: 10.979130625213882
Mean Absolute Error for Citizen Max Wait Time: 5.350778216018147
Mean Absolute Error for Non citizen Avg Wait Time: 8.336685160896481
Mean Absolute Error for Non citizen Max Wait Time: 5.47027079670574


Mean Absolute Error for each target when Flights > 0:
Mean Absolute Error for Citizen Avg Wait Time: 28.568297172761834
Mean Absolute Error for Citizen Max Wait Time: 13.841281887369062
Mean Absolute Error for Non citizen Avg Wait Time: 21.65413616887047
Mean Absolute Error for Non citizen Max Wait Time: 14.153943379460786