In [1]:
# FINAL WORKING MODEL — NO LIGHTGBM, NO OLD .pkl, < 100 MB, R² > 0.995
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
import joblib
import os

# Load data
df = pd.read_csv("world_risk_index.csv")
df.columns = df.columns.str.strip()

# Clean
df = df.drop(columns=['Region', 'Year', 'WRI Category', 'Exposure Category',
                      'Vulnerability Category', 'Susceptibility Category'], errors='ignore')

features = ["Exposure", "Vulnerability", "Susceptibility",
            "Lack of Coping Capabilities", "Lack of Adaptive Capacities"]

X = df[features].fillna(df[features].median())
y = df["WRI"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)

print("Training 2 powerful & clean models...")

# Only CatBoost + Random Forest → no lightgbm needed
cb = CatBoostRegressor(iterations=1000, depth=6, learning_rate=0.05, verbose=0, random_state=42)
rf = RandomForestRegressor(n_estimators=600, max_depth=22, n_jobs=1, random_state=42)

cb.fit(X_train, y_train)
rf.fit(X_train, y_train)

print("Creating final stacked model...")

# Level-1 predictions
train_stack = np.column_stack([cb.predict(X_train), rf.predict(X_train)])
test_stack  = np.column_stack([cb.predict(X_test),  rf.predict(X_test)])

# Final meta-model
meta = Ridge(alpha=0.1)
meta.fit(train_stack, y_train)
final_pred = meta.predict(test_stack)

print("\n" + "="*60)
print("FINAL MODEL — CLEAN, WORKING, < 100 MB")
print("="*60)
print(f"R²   : {r2_score(y_test, final_pred):.6f}")
print(f"RMSE : {np.sqrt(((final_pred - y_test)**2).mean()):.5f}")
print("="*60)

# Save final clean model
final_model = {
    'base_models': [cb, rf],
    'meta_model': meta,
    'features': features
}

joblib.dump(final_model, "WINNING_MODEL_2025.pkl", compress=3)

size_mb = os.path.getsize("WINNING_MODEL_2025.pkl") / (1024*1024)
print(f"\nMODEL SAVED → WINNING_MODEL_2025.pkl")
print(f"Size: {size_mb:.1f} MB → 100% under 100 MB")

Training 2 powerful & clean models...
Creating final stacked model...

FINAL MODEL — CLEAN, WORKING, < 100 MB
R²   : 0.995878
RMSE : 0.34849

MODEL SAVED → WINNING_MODEL_2025.pkl
Size: 17.1 MB → 100% under 100 MB
