#### 1: Setup and Security

In [2]:
import os
import pandas as pd
import joblib
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

load_dotenv() 
def get_engine():
    user = os.getenv("DB_USER")
    password = os.getenv("DB_PASSWORD")
    host = os.getenv("DB_HOST")
    db = os.getenv("DB_NAME")
    return create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}/{db}")

engine = get_engine()
print("Environment and Engine Ready")

Environment and Engine Ready


#### 2: Data Loading

In [4]:
query = "SELECT * FROM v_manpower_gap_analysis"
df = pd.read_sql(query, engine)
print(f" Data Loaded: {len(df)} rows")
df.head()

 Data Loaded: 2046 rows


Unnamed: 0,flight_id,aircraft_type,scheduled_arrival,actual_staff_deployed,required_staff,turnaround_delay_mins
0,EK308,A320,2025-01-04 07:49:00,2,6,34
1,EK350,A380,2025-01-02 08:40:00,19,20,45
2,EK350,A380,2025-01-05 18:52:00,19,20,45
3,EK350,A320,2025-01-06 06:02:00,19,6,45
4,EK350,B777,2025-01-06 23:24:00,19,12,45


#### 3: Feature Engineering

In [5]:
df['scheduled_arrival'] = pd.to_datetime(df['scheduled_arrival'])
df['hour'] = df['scheduled_arrival'].dt.hour
df['is_peak_wave'] = df['hour'].apply(lambda x: 1 if x in [22, 23, 0, 1, 2, 7, 8, 9] else 0)
df['staff_gap'] = df['actual_staff_deployed'] - df['required_staff']
df_ml = pd.get_dummies(df, columns=['aircraft_type'])
print("Features Engineered")

Features Engineered


#### 4: Model Training

In [7]:
features = ['hour', 'is_peak_wave', 'actual_staff_deployed', 'staff_gap',
            'aircraft_type_A320', 'aircraft_type_A380', 'aircraft_type_B777']
X = df_ml[features]
y = df_ml['turnaround_delay_mins']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("Model Training Complete")

Model Training Complete


#### 5: Evaluation and Saving

In [8]:
mae = mean_absolute_error(y_test, model.predict(X_test))
print(f" Mean Absolute Error: {mae:.2f} minutes")

joblib.dump(model, 'emirates_delay_model.pkl')
joblib.dump(features, 'model_features.pkl')
print("Model artifacts saved.")

 Mean Absolute Error: 13.68 minutes
Model artifacts saved.
