In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import lightgbm as lgb
import xgboost as xgb
import shap
import matplotlib.pyplot as plt

file_path = "C:/Users/sarat/OneDrive/Desktop/Ë∑ëÈ†êÊ∏¨Ê®°Âûã_Êîπ/Merged_TNKI_65+yr.csv"
df = pd.read_csv(file_path)
group = file_path.split("\\")[-1].replace(".csv", "")

df = df.dropna().reset_index(drop=True)

drop_cols = ["day", "Count_all", "Count_cvd", "rate"]
features = [col for col in df.columns if col not in drop_cols]
X = df[features].values
y = df["rate"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = {}
models = {}

# === 1. SVR ===
svm = SVR(kernel="rbf", C=10, epsilon=0.003)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
results["SVR"] = r2_score(y_test, y_pred)
models["SVR"] = svm

# === 2. Ridge ===
ridge = Ridge(alpha=0.01, random_state=123)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
results["Ridge"] = r2_score(y_test, y_pred)
models["Ridge"] = ridge

# === 3. Lasso ===
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lasso_cv = LassoCV(alphas=np.logspace(-4, 0, 50), cv=5, max_iter=10000, random_state=123)
lasso_cv.fit(X_train_scaled, y_train)
y_pred = lasso_cv.predict(X_test_scaled)
results["Lasso"] = r2_score(y_test, y_pred)
models["Lasso"] = lasso_cv

# === 4. LightGBM ===
lgb_model = lgb.LGBMRegressor(n_estimators=100, max_depth=5, n_jobs=-1, random_state=42)
lgb_model.fit(X_train, y_train)
y_pred = lgb_model.predict(X_test)
results["LightGBM"] = r2_score(y_test, y_pred)
models["LightGBM"] = lgb_model

# === 5. XGBoost ===
xgb_model = xgb.XGBRegressor(n_estimators=100, max_depth=5, n_jobs=-1, random_state=42, verbosity=0)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
results["XGBoost"] = r2_score(y_test, y_pred)
models["XGBoost"] = xgb_model

# === 6. Random Forest ===
rf_model = RandomForestRegressor(random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
results["RandomForest"] = r2_score(y_test, y_pred)
models["RandomForest"] = rf_model

# === È°ØÁ§∫ R¬≤ ===
print(f"=== {group} ÂêÑÊ®°Âûã R¬≤ ÁµêÊûú ===")
for model_name, r2 in results.items():
    print(f"{model_name}: {r2:.4f}")

# === Âª∫Á´ã SHAP Âúñ ===
for name, model in models.items():
    print(f"\nüîç Ê≠£Âú®Áî¢Áîü {name} ÁöÑ SHAP Âúñ...")

    try:
        if name in ["LightGBM", "XGBoost", "RandomForest"]:
            explainer = shap.TreeExplainer(model)
            shap_values = explainer.shap_values(X_test)
            shap.summary_plot(shap_values, X_test, feature_names=features, max_display=20, show=False)
        
        elif name in ["Ridge", "Lasso"]:
            explainer = shap.LinearExplainer(model, X_train_scaled if name == "Lasso" else X_train)
            shap_values = explainer.shap_values(X_test_scaled if name == "Lasso" else X_test)
            shap.summary_plot(shap_values, X_test_scaled if name == "Lasso" else X_test, feature_names=features, max_display=20, show=False)

        elif name == "SVR":
            # ÂèñÂ∞èÈÉ®ÂàÜÊ®£Êú¨ÈÅøÂÖçÂ§™ÊÖ¢
            sample_idx = np.random.choice(len(X_test), size=min(200, len(X_test)), replace=False)
            explainer = shap.KernelExplainer(model.predict, X_train[:100])
            shap_values = explainer.shap_values(X_test[sample_idx], nsamples=100)
            shap.summary_plot(shap_values, X_test[sample_idx], feature_names=features, max_display=20, show=False)
        
        plt.title(f"Top 20 SHAP Features - {group} ({name})")
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"‚ö† {name} SHAP Ë®àÁÆóÂ§±ÊïóÔºö{e}")