In [1]:
import joblib
import pandas as pd
import numpy as np
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge  # Use for super model
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
X_train = pd.read_csv('../data_csv/X_train_final.csv')
y_train = pd.read_csv('../data_csv/y_train_final.csv')
X_test = pd.read_csv('../data_csv/X_test_final.csv')
y_test = pd.read_csv('../data_csv/y_test_final.csv')

# Load model
rf_model_loaded = joblib.load('../models/best_rf_model.pkl')
xgb_model_loaded = joblib.load('../models/best_xgb_model.pkl')

In [3]:
estimators = [
    ('xgb', xgb_model_loaded),
    ('rf', rf_model_loaded)
]

stacking_model = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge(alpha=0.1), # Meta-model
    cv='prefit'  
)

In [4]:
print("Start training")
stacking_model.fit(X_train, y_train)
print("Done!")

Start training


  y = column_or_1d(y, warn=True)


Done!


In [5]:
y_pred_log_stack = stacking_model.predict(X_test)

# Chuyển đổi ngược
y_test_original = np.expm1(y_test)
y_pred_original_stack = np.expm1(y_pred_log_stack)

# Tính toán chỉ số
r2_stack = r2_score(y_test_original, y_pred_original_stack)
mae_stack = mean_absolute_error(y_test_original, y_pred_original_stack)
rmse_stack = np.sqrt(mean_squared_error(y_test_original, y_pred_original_stack))

print(f"\n--- STACKING MODEL ---")
print(f"R-squared (R²): {r2_stack:.4f}")
print(f"MAE: ${mae_stack:,.2f}")
print(f"RMSE: ${rmse_stack:,.2f}")


--- STACKING MODEL ---
R-squared (R²): 0.8648
MAE: $2,596.68
RMSE: $5,356.90


In [6]:
import joblib
import os

os.makedirs('../models', exist_ok=True)
joblib.dump(stacking_model, '../models/stacking_model.pkl')

['../models/stacking_model.pkl']