In [1]:
import pandas as pd
import numpy as np
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import r2_score, mean_absolute_percentage_error

In [2]:
from joblib import Parallel, delayed
from tqdm import tqdm

In [3]:
# === 參數設定 ===
folder = "C:/Users/sarat/OneDrive/Desktop/標準化檔案"  # 這裡改成你的資料夾路徑
exclude_cols = ["day", "Count_all", "Count_cvd", "rate"]
test_size = 0.2
n_jobs = -1  # 平行化核心數（-1 表示用全部 CPU）

# === 找出所有 normalized 檔案 ===
files = glob.glob(os.path.join(folder, "*_normalized.csv"))

# === 定義處理函式 ===
def process_file(file):
    df = pd.read_csv(file)
    group = os.path.basename(file).split("Merged_Feature_day_")[1].split("_normalized.csv")[0]

    # 特徵與目標
    X = df.drop(columns=[c for c in exclude_cols if c in df.columns], errors="ignore")
    y = df["rate"]

    # 分訓練/測試集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # === 模型 ===
    models = {
        "MLP": MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=2000, random_state=42),
        "KNN": KNeighborsRegressor(n_neighbors=5),
        "Bayes": BayesianRidge()
    }

    results = {}
    for name, model in models.items():
        try:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            r2 = r2_score(y_test, y_pred)
            mape = mean_absolute_percentage_error(y_test, y_pred) * 100
            results[name] = (group, r2, mape)
        except Exception as e:
            print(f"Error processing {file} with {name}: {e}")
            results[name] = (group, np.nan, np.nan)
    return results

# === 平行化處理所有檔案 ===
all_results = Parallel(n_jobs=n_jobs)(
    delayed(process_file)(f)
    for f in tqdm(files, desc="Processing files", ncols=100)
)

# === 整理結果 ===
r2_dict = {"MLP": [], "KNN": [], "Bayes": []}
mape_dict = {"MLP": [], "KNN": [], "Bayes": []}

for res in all_results:
    for name, (group, r2, mape) in res.items():
        r2_dict[name].append({"group": group, "r^2": r2})
        mape_dict[name].append({"group": group, "mape": mape})

# === 轉成 DataFrame 並輸出 ===
output_folder = "C:/Users/sarat/OneDrive/Desktop/3other_models"
os.makedirs(output_folder, exist_ok=True)

for name in ["MLP", "KNN", "Bayes"]:
    df_r2 = pd.DataFrame(r2_dict[name]).sort_values("r^2", ascending=False)
    df_mape = pd.DataFrame(mape_dict[name]).sort_values("mape", ascending=True)
    df_r2.to_csv(os.path.join(output_folder, f"{name}_r2.csv"), index=False)
    df_mape.to_csv(os.path.join(output_folder, f"{name}_mape.csv"), index=False)

print("✅ 全部模型執行完畢！")

Processing files:   0%|                                                      | 0/42 [00:00<?, ?it/s]

Processing files: 100%|█████████████████████████████████████████████| 42/42 [00:43<00:00,  1.03s/it]


✅ 全部模型執行完畢！
