#Modelling

In [1]:
pip install catboost



In [2]:
!pip install tensorflow==2.15.0



In [3]:
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import tensorflow.keras.backend as K
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Input

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
data = pd.read_csv('/content/drive/MyDrive/Sales Forecast/train_final1.csv', parse_dates=['Date'])
data = data.sort_values('Date')

In [6]:
# Drop features that depend on Sales (lags, rolling, trend, decomposition)
sales_dependent_features = [
    'Sales_lag_1', 'Sales_lag_7', 'Sales_lag_14', 'Sales_lag_30',
    'Sales_roll_mean_7', 'Sales_roll_std_7',
    'StoreTrend',
    # StoreType decomposition
    'StoreType_0_trend', 'StoreType_0_seasonal', 'StoreType_0_resid',
    'StoreType_1_trend', 'StoreType_1_seasonal', 'StoreType_1_resid',
    'StoreType_2_trend', 'StoreType_2_seasonal', 'StoreType_2_resid',
    'StoreType_3_trend', 'StoreType_3_seasonal', 'StoreType_3_resid',
    # Assortment decomposition
    'Assortment_0_trend', 'Assortment_0_seasonal', 'Assortment_0_resid',
    'Assortment_1_trend', 'Assortment_1_seasonal', 'Assortment_1_resid',
    'Assortment_2_trend', 'Assortment_2_seasonal', 'Assortment_2_resid'
]

data = data.drop(columns=[col for col in sales_dependent_features if col in data.columns], errors='ignore')


In [7]:
data.columns.tolist()

['Store',
 'DayOfWeek',
 'Date',
 'Sales',
 'Open',
 'Promo',
 'StateHoliday',
 'SchoolHoliday',
 'StoreType',
 'Assortment',
 'CompetitionDistance',
 'CompetitionOpenSinceMonth',
 'CompetitionOpenSinceYear',
 'Promo2',
 'Promo2SinceWeek',
 'Promo2SinceYear',
 'PromoInterval',
 'DateInt',
 'Year',
 'Month',
 'Day',
 'DayOfYear',
 'WeekOfYear',
 'IsWeekend',
 'Quarter',
 'IsMonthStart',
 'IsMonthEnd',
 'CompetitionOpenSince',
 'Promo2Since',
 'IsPromo2Month']

In [8]:
# Backward fill all missing values to preserve all rows
data = data.bfill().reset_index(drop=True)

In [9]:
decomp_cols = [col for col in data.columns if 'trend' in col or 'resid' in col]

data[decomp_cols] = data[decomp_cols].ffill().bfill()

In [10]:
low_memory=False

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


##Month-based filtering (based on lit review)

In [13]:
def filter_may_to_sept(df):
    return df[df['Month'].isin([5, 6, 7, 8, 9])]

In [14]:
def drop_recent_month(df):
    max_date = df['Date'].max()
    cutoff = max_date - pd.DateOffset(days=30)
    return df[df['Date'] < cutoff]

##Splitting

In [15]:
# def time_split(df, target_col='Sales', test_size=0.2):
#     split_idx = int(len(df) * (1 - test_size))
#     X = df.drop(columns=[target_col, 'Date'])
#     y = df[target_col]
#     return X.iloc[:split_idx], X.iloc[split_idx:], y.iloc[:split_idx], y.iloc[split_idx:]


def time_split(df, target_col='Sales', split_type='proportional'):
    df = df.sort_values('Date')
    if split_type == 'last_weeks':
        split_date = pd.to_datetime('2015-06-14')
        train_df = df[df['Date'] < split_date]
        val_df = df[df['Date'] >= split_date]
    else:
        split_idx = int(len(df) * 0.8)
        train_df = df.iloc[:split_idx]
        val_df = df.iloc[split_idx:]
    X_train = train_df.drop(columns=[target_col, 'Date'])
    y_train = train_df[target_col]
    X_val = val_df.drop(columns=[target_col, 'Date'])
    y_val = val_df[target_col]
    return X_train, X_val, y_train, y_val

##Models

### ML

In [16]:
def get_xgb():
    return xgb.XGBRegressor(
        n_estimators=1500,
        learning_rate=0.03,
        max_depth=9,
        subsample=0.7,
        colsample_bytree=0.9,
        min_child_weight=8,
        tree_method='gpu_hist',
        predictor='gpu_predictor',
        random_state=42
    )

In [17]:
def get_lgb():
    return lgb.LGBMRegressor(
        boosting_type='dart',
        n_estimators=2000,
        learning_rate=0.05,
        max_depth=6,
        num_leaves=32,
        device='gpu',
        random_state=42
    )

In [18]:
def get_cat():
    return CatBoostRegressor(
        iterations=1500,
        learning_rate=0.05,
        depth=8,
        task_type='GPU',
        devices='0',
        verbose=0,
        random_state=42
    )

In [19]:
def get_rf():
    return RandomForestRegressor(
        n_estimators=100,
        max_depth=10,
        random_state=42
    )

In [20]:
def get_mlp():
    return MLPRegressor(
        hidden_layer_sizes=(64, 64),
        max_iter=300,
        random_state=42
    )

In [21]:
def get_histgbr():
    return HistGradientBoostingRegressor(
        learning_rate=0.05,
        max_iter=1000,
        max_depth=7,
        l2_regularization=0.1,
        random_state=42
    )

In [22]:
# def train_model(model_code):
#     X_train, X_val, y_train, y_val = time_split(data)

#     model_map = {
#         'xgb': get_xgb,
#         'lgb': get_lgb,
#         'cat': get_cat,
#         'rf': get_rf,
#         'mlp': get_mlp
#     }

#     if model_code not in model_map:
#         raise ValueError(f"Unsupported model code: {model_code}")

#     model = model_map[model_code]()
#     model.fit(X_train, y_train)

#     # # Optional: train for epochs (uncomment if needed)
#     # for epoch in range(5):
#     #     model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

#     return model, X_val, y_val

def train_model(model_code, features=None, df=None, split_type='last_weeks'):
    use_df = df.copy() if df is not None else data.copy()
    X_train, X_val, y_train, y_val = time_split(use_df, split_type=split_type)

    if features:
        X_train = X_train[features]
        X_val = X_val[features]

    model_map = {
        'xgb': get_xgb(),
        'lgb': get_lgb(),
        'cat': get_cat(),
        'rf': get_rf(),
        'mlp': get_mlp(),
        'hist': get_histgbr()
    }

    if model_code not in model_map:
        raise ValueError(f"Unsupported model code: {model_code}")

    model = model_map[model_code] # Assign the model object here


    model.fit(X_train, y_train)

    return model, X_val, y_val

In [24]:
def ensemble_predictions(models, X_val):
    preds = [np.expm1(model.predict(X_val)) for model in models]
    return np.mean(preds, axis=0)

### DL

In [25]:
def create_sequences_scaled(df, target_col='Sales', window=14):
    df = df.drop(columns=['Date'])

    # Fill missing values (for lag/decomp features)
    df = df.bfill()

    # Separate and scale target
    y_raw = df[target_col].clip(lower=1).values.reshape(-1, 1)  # ensure > 0
    target_scaler = StandardScaler()
    y_scaled = target_scaler.fit_transform(y_raw).flatten()

    # Scale features
    X_df = df.drop(columns=[target_col])
    feature_scaler = StandardScaler()
    X_scaled = feature_scaler.fit_transform(X_df)

    X = []
    y = []
    for i in range(window, len(X_scaled)):
        X.append(X_scaled[i-window:i])
        y.append(y_scaled[i])

    X = np.array(X)
    y = np.array(y)

    return X, y, target_scaler

In [26]:
def get_cnn_lstm(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(32))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    return model

In [27]:
def train_cnn_lstm_scaled(df, target_col='Sales', window=14, epochs=100, batch_size=256):
    df = df.copy()
    X_all, y_all, target_scaler = create_sequences_scaled(df, target_col, window)

    split_idx = int(len(X_all) * 0.8)
    X_train, X_val = X_all[:split_idx], X_all[split_idx:]
    y_train, y_val = y_all[:split_idx], y_all[split_idx:]

    model = get_cnn_lstm(input_shape=(X_train.shape[1], X_train.shape[2]))
    model.compile(
        optimizer='adam',
        loss='mse'
    )

    es = EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[es],
        verbose=1
    )

    return model, X_val, y_val, target_scaler


##*Evaluation*

### ML

In [28]:
def rmspe(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sqrt(np.mean(((y_true - y_pred) / y_true) ** 2))

In [29]:
# def evaluate_model(model, X_val, y_val):
#     preds = np.expm1(model.predict(X_val))
#     y_val_true = np.expm1(y_val)

#     rmse = sqrt(mean_squared_error(y_val_true, preds))
#     mae = mean_absolute_error(y_val_true, preds)
#     mape = np.mean(np.abs((y_val_true - preds) / y_val_true)) * 100
#     return {'RMSE': rmse, 'MAE': mae, 'MAPE': mape}

def evaluate_model(model, X_val, y_val):
    # Apply inverse log transform + correction factor
    preds = np.expm1(model.predict(X_val)) * 0.995
    y_val_true = np.expm1(y_val)

    rmse = sqrt(mean_squared_error(y_val_true, preds))
    mae = mean_absolute_error(y_val_true, preds)
    mape = np.mean(np.abs((y_val_true - preds) / y_val_true)) * 100
    rmspe_val = rmspe(y_val_true, preds)

    return {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'RMSPE': rmspe_val
    }


### DL

In [30]:
def evaluate_dl_model_scaled(model, X_val, y_val, scaler):
    preds_scaled = model.predict(X_val).flatten()
    y_pred = scaler.inverse_transform(preds_scaled.reshape(-1, 1)).flatten()
    y_true = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()

    def rmspe(y_true, y_pred):
        return np.sqrt(np.mean(np.square((y_true - y_pred) / (y_true + 1e-6))))

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    rmspe_val = rmspe(y_true, y_pred)

    return {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'RMSPE': rmspe_val
    }


##Calling Functions

###Single Model

####Method 1: XGB

In [None]:
model_xgb, X_val, y_val = train_model("xgb")  #"xgb", "lgb", "cat", "rf", "mlp"

In [None]:
metrics_xgb = evaluate_model(model_xgb, X_val, y_val)
print(metrics_xgb)

{'RMSE': 936.7951595297606, 'MAE': 646.7923437023254, 'MAPE': np.float64(9.365256045581493), 'RMSPE': np.float64(0.12825368898217582)}


####Method 2: LGB

In [None]:
model_lgb, X_val, y_val = train_model("lgb")  #"xgb", "lgb", "cat", "rf", "mlp"

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 7131
[LightGBM] [Info] Number of data points in the train set: 798454, number of used features: 55
[LightGBM] [Info] Using GPU Device: NVIDIA A100-SXM4-40GB, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 49 dense feature groups (39.60 MB) transferred to GPU in 0.029705 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 8.755916


In [None]:
metrics_lgb = evaluate_model(model_lgb, X_val, y_val)
print(metrics_lgb)

{'RMSE': 1303.1506946948402, 'MAE': 962.4669258697669, 'MAPE': np.float64(12.942063214880092), 'RMSPE': np.float64(0.1554461253017968)}


####Method 3: HistGBR


In [None]:
model_hist, X_val, y_val = train_model("hist")

In [None]:
metrics_hist = evaluate_model(model_hist, X_val, y_val)
print(metrics_hist)

{'RMSE': 1089.589211736566, 'MAE': 735.2413616187729, 'MAPE': np.float64(10.382064839723425), 'RMSPE': np.float64(0.14293924875139905)}


#### Method 4: CNN-LSTM

In [None]:
model_cnnlstm, X_val_dl, y_val_dl, scaler = train_cnn_lstm_scaled(data)

Epoch 1/100
[1m2639/2639[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 8ms/step - loss: 0.7930 - val_loss: 0.7604
Epoch 2/100
[1m 549/2639[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m13s[0m 6ms/step - loss: 0.7568

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipython-input-31-1357457196.py", line 1, in <cell line: 0>
    model_cnnlstm, X_val_dl, y_val_dl, scaler = train_cnn_lstm_scaled(data)
                                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-25-2781717421.py", line 17, in train_cnn_lstm_scaled
    model.fit(
  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit
    logs = self.train_function(iterator)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function
    opt_out

TypeError: object of type 'NoneType' has no len()

In [None]:
metrics_dl = evaluate_dl_model_scaled(model_cnnlstm, X_val_dl, y_val_dl, scaler)
print(metrics_dl)

[1m5278/5278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step
{'RMSE': np.float64(0.3615969290636361), 'MAE': 0.27735489671708125, 'MAPE': np.float64(3.177379382719634), 'RMSPE': np.float64(0.04189768089879272)}


####Method 5: CAT

In [None]:
model_cat, X_val, y_val = train_model("cat")

In [None]:
metrics_cat = evaluate_model(model_cat, X_val, y_val)
print(metrics_cat)

{'RMSE': 1089.589211736566, 'MAE': 735.2413616187729, 'MAPE': np.float64(10.382064839723425), 'RMSPE': np.float64(0.14293924875139905)}


####Method 6: RF

In [None]:
model_rf, X_val, y_val = train_model("rf")

In [None]:
metrics_rf = evaluate_model(model_rf, X_val, y_val)
print(metrics_rf)

{'RMSE': 1095.6409091029145, 'MAE': 737.4523674410626, 'MAPE': np.float64(10.375897340931663), 'RMSPE': np.float64(0.14278619778575874)}


###Ensemble Modelling

In [None]:
def evaluate_ensemble(models, X_val, y_val, correction_factor=0.995):
    preds = [np.expm1(model.predict(X_val)) * correction_factor for model in models]
    ensemble_preds = np.mean(preds, axis=0)
    y_val_true = np.expm1(y_val)

    rmse = sqrt(mean_squared_error(y_val_true, ensemble_preds))
    mae = mean_absolute_error(y_val_true, ensemble_preds)
    mape = np.mean(np.abs((y_val_true - ensemble_preds) / y_val_true)) * 100
    rmspe_val = rmspe(y_val_true, ensemble_preds)

    return {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'RMSPE': rmspe_val
    }

####Method 1: XGB & LGB & CAT + Filtered Data

In [None]:
filtered = filter_may_to_sept(data)
model_xgb, X_val, y_val = train_model("xgb", df=filtered)
model_lgb, _, _ = train_model("lgb", df=filtered)
model_cat, _, _ = train_model("cat", df=filtered)

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 6787
[LightGBM] [Info] Number of data points in the train set: 309094, number of used features: 55
[LightGBM] [Info] Using GPU Device: NVIDIA A100-SXM4-40GB, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 49 dense feature groups (15.33 MB) transferred to GPU in 0.009052 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 8.746445


In [None]:
models = [model_xgb, model_lgb, model_cat]

# Evaluate the ensemble using the same X_val and y_val from the first model
ensemble_metrics = evaluate_ensemble(models, X_val, y_val)
# Print results
print(ensemble_metrics)


{'RMSE': 906.6962912922345, 'MAE': 625.8237786083328, 'MAPE': np.float64(8.676804511076972), 'RMSPE': np.float64(0.11355291999028151)}


####Method 2: XGB  + Filtered Data (x3)

In [None]:
#Full data (no filtering)
model_xgb_full, X_val, y_val = train_model("xgb", df=data)

#May–September
filtered_maysept = filter_may_to_sept(data)
model_xgb_maysept, _, _ = train_model("xgb", df=filtered_maysept)

#May–September + drop recent month
filtered_month_ahead = drop_recent_month(filtered_maysept)
model_xgb_month_ahead, _, _ = train_model("xgb", df=filtered_month_ahead)


In [None]:
xgb_ensemble_models = [model_xgb_full, model_xgb_maysept, model_xgb_month_ahead]

ensemble_metrics = evaluate_ensemble(xgb_ensemble_models, X_val, y_val)
print(ensemble_metrics)

{'RMSE': 850.5717098614708, 'MAE': 590.2522280425932, 'MAPE': np.float64(8.428744516867374), 'RMSPE': np.float64(0.11406987449671782)}


####Method 3: XGB & RF & Hist + Filtered Data

In [None]:
filtered = filter_may_to_sept(data)
model_xgb, X_val, y_val = train_model("xgb", df=filtered)
model_rf, _, _ = train_model("rf", df=filtered)
model_hist, _, _ = train_model("hist", df=filtered)

In [None]:
models = [model_xgb, model_rf, model_hist]

ensemble_metrics = evaluate_ensemble(models, X_val, y_val)

print(ensemble_metrics)

{'RMSE': 1038.7848573606598, 'MAE': 700.8206127485163, 'MAPE': np.float64(9.91525332639744), 'RMSPE': np.float64(0.1357080190459671)}


####Method 4: XGB & CNN-LSTM

In [31]:
from sklearn.model_selection import train_test_split
import xgboost as xgb

def train_aligned_xgboost(df, target_col='Sales', window=14, log_transform=False):
    df = df.copy()

    # Drop first N rows to match CNN windowing
    df = df.iloc[window:].reset_index(drop=True)

    if log_transform:
        df[target_col] = np.log1p(df[target_col])

    X = df.drop(columns=['Date', target_col])
    y = df[target_col]

    split_idx = int(len(X) * 0.8)
    X_train, X_val = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_val = y.iloc[:split_idx], y.iloc[split_idx:]

    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    params = {
        'objective': 'reg:squarederror',
        'eta': 0.03,
        'max_depth': 10,
        'subsample': 0.85,
        'colsample_bytree': 0.3,
        'min_child_weight': 4,
        'eval_metric': 'rmse',
        'tree_method': 'gpu_hist',
        'predictor': 'gpu_predictor'
    }

    model = xgb.train(
        params,
        dtrain,
        num_boost_round=3000,
        evals=[(dtrain, 'train'), (dval, 'valid')],
        early_stopping_rounds=50,
        verbose_eval=100
    )

    return model, X_val, y_val


In [32]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

def evaluate_ensemble_preds(y_true, y_pred):
    def rmspe(y_true, y_pred):
        return np.sqrt(np.mean(np.square((y_true - y_pred) / (y_true + 1e-6))))

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    rmspe_val = rmspe(y_true, y_pred)

    return {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'RMSPE': rmspe_val
    }

In [33]:
model_xgb, X_val_tab, y_val_tab = train_aligned_xgboost(data, window=14, log_transform=False)

[0]	train-rmse:0.42532	valid-rmse:0.41147
[100]	train-rmse:0.26561	valid-rmse:0.27639
[200]	train-rmse:0.20823	valid-rmse:0.22963
[300]	train-rmse:0.16934	valid-rmse:0.19795
[400]	train-rmse:0.14313	valid-rmse:0.17712
[500]	train-rmse:0.12671	valid-rmse:0.16442
[600]	train-rmse:0.11502	valid-rmse:0.15545
[700]	train-rmse:0.10855	valid-rmse:0.15084
[800]	train-rmse:0.10368	valid-rmse:0.14766
[900]	train-rmse:0.09928	valid-rmse:0.14467
[1000]	train-rmse:0.09614	valid-rmse:0.14271
[1100]	train-rmse:0.09385	valid-rmse:0.14136
[1200]	train-rmse:0.09120	valid-rmse:0.13986
[1300]	train-rmse:0.08953	valid-rmse:0.13921
[1400]	train-rmse:0.08800	valid-rmse:0.13857
[1500]	train-rmse:0.08663	valid-rmse:0.13797
[1600]	train-rmse:0.08532	valid-rmse:0.13740
[1700]	train-rmse:0.08412	valid-rmse:0.13690
[1800]	train-rmse:0.08301	valid-rmse:0.13644
[1900]	train-rmse:0.08203	valid-rmse:0.13615
[1980]	train-rmse:0.08135	valid-rmse:0.13611


In [34]:
model_cnnlstm, X_val_dl, y_val_dl, scaler = train_cnn_lstm_scaled(data)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


In [35]:
# CNN–LSTM predictions
cnn_preds = scaler.inverse_transform(model_cnnlstm.predict(X_val_dl).reshape(-1, 1)).flatten()

# XGB predictions
xgb_preds = model_xgb.predict(xgb.DMatrix(X_val_tab))

# Ensemble
ensemble_preds = 0.5 * xgb_preds + 0.5 * cnn_preds




In [36]:
metrics = evaluate_ensemble_preds(y_val_tab.values, ensemble_preds)
print(metrics)

{'RMSE': 0.22541600416856383, 'MAE': 0.173159312037515, 'MAPE': 1.9728117795537237, 'RMSPE': 0.025894138946980654}


## Saving Models

In [43]:
import os
import joblib
os.makedirs('models', exist_ok=True)

# Save the XGBoost model
model_xgb.save_model('models/model_xgb.json')
model_cnnlstm.compile(
    optimizer='adam',
    loss='mse'
)
model_cnnlstm.save('models/model_cnnlstm.keras', save_format='tf')

# Save the scaler
joblib.dump(scaler, 'models/cnn_scaler.pkl')

# Save ensemble metadata
ensemble_meta = {
    'xgb_version': xgb.__version__,
    'tensorflow_version': tf.__version__,
    'feature_names': list(X_val_tab.columns),
    'cnn_window_size': 14,
    'cnn_n_features': X_val_dl.shape[2],  # Get actual number of features
    'ensemble_weights': {
        'xgb': 0.5,
        'cnn_lstm': 0.5
    }
}

joblib.dump(ensemble_meta, 'models/ensemble_meta.pkl')

# Verify the files exist
for file in ['model_xgb.json', 'model_cnnlstm.keras', 'cnn_scaler.pkl', 'ensemble_meta.pkl']:
    if os.path.exists(os.path.join('models', file)):
        print(f"✅ {file} saved successfully")
    else:
        print(f"❌ {file} not found")

✅ model_xgb.json saved successfully
✅ model_cnnlstm.keras saved successfully
✅ cnn_scaler.pkl saved successfully
✅ ensemble_meta.pkl saved successfully
