In [1]:
import pandas as pd
import os
from sklearn.linear_model import LinearRegression
import joblib

In [None]:
df = pd.read_csv("../data/synthetic_airline_data.csv")
print("Loaded dataset, shape:", df.shape)
print(df.head())

✅ Loaded dataset, shape: (124, 5)
  MarketID  Quarter Airline  Passengers     AvgFare
0     MKT1        1      DL         156  489.845500
1     MKT1        1      UA          70  178.009320
2     MKT1        1      AA         264  129.041806
3     MKT1        1      B6         137  266.854306
4     MKT1        2      UA         393  516.221320


In [3]:
df_pivot = df.pivot_table(
    index=["MarketID", "Quarter"],
    columns="Airline",
    values=["Passengers", "AvgFare"]
)

# Flatten multi-index columns
df_pivot.columns = ['_'.join(col).strip() for col in df_pivot.columns.values]
df_pivot = df_pivot.dropna()  # Drop rows missing any airline
print("\n Pivoted dataset, shape:", df_pivot.shape)
print(df_pivot.head())


 Pivoted dataset, shape: (16, 8)
                  AvgFare_AA  AvgFare_B6  AvgFare_DL  AvgFare_UA  \
MarketID Quarter                                                   
MKT1     1        129.041806  266.854306  489.845500  178.009320   
         2        191.702255  100.389383  362.378216  516.221320   
MKT10    2        197.136977  350.758147  450.983439  140.797090   
MKT2     1        442.116513  107.983126  574.442769  381.644109   
         2        117.194261  312.577937  295.530304  431.261142   

                  Passengers_AA  Passengers_B6  Passengers_DL  Passengers_UA  
MarketID Quarter                                                              
MKT1     1                264.0          137.0          156.0           70.0  
         2                493.0          343.0          363.0          393.0  
MKT10    2                222.0          440.0          313.0           62.0  
MKT2     1                435.0          314.0          323.0          365.0  
         2     

In [5]:
airlines = df["Airline"].unique().tolist()
print("\nCompeting airlines:", airlines)
os.makedirs("models", exist_ok=True)

for airline in airlines:
    feature_cols = [f"AvgFare_{a}" for a in airlines if f"AvgFare_{a}" in df_pivot.columns]
    target_col = f"Passengers_{airline}"

    if target_col not in df_pivot.columns:
        print(f"Skipping {airline} (not enough data).")
        continue

    X = df_pivot[feature_cols]
    y = df_pivot[target_col]

    model = LinearRegression()
    model.fit(X, y)

    r2 = model.score(X, y)
    print(f"Demand model for {airline} trained (R² = {r2:.3f})")

    # Save model
    joblib.dump(model, f"models/demand_model_{airline}.pkl")

print("\n All models trained and saved in 'models/' folder.")


Competing airlines: ['DL', 'UA', 'AA', 'B6']
Demand model for DL trained (R² = 0.417)
Demand model for UA trained (R² = 0.474)
Demand model for AA trained (R² = 0.429)
Demand model for B6 trained (R² = 0.173)

 All models trained and saved in 'models/' folder.
