In [40]:
# 하이퍼파라미터 최적화 없이 바로 MLP 학습/평가 (수동 파라미터 적용)

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report, roc_curve, auc
)
from pathlib import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.stats import zscore

def extra_normalize(X_train, X_valid, X_test):
    scaler = MinMaxScaler()
    X_train_norm = scaler.fit_transform(X_train)
    X_valid_norm = scaler.transform(X_valid)
    X_test_norm  = scaler.transform(X_test)
    return X_train_norm, X_valid_norm, X_test_norm

def remove_outliers(X, y, z_thresh=3.0, min_valid_ratio=0.95):
    z = np.abs(zscore(X, axis=0, nan_policy='omit'))
    mask = (z < z_thresh).sum(axis=1) >= (X.shape[1] * min_valid_ratio)
    return X[mask], y[mask], mask

def augment_false_class(X, y, ratio=1.0, noise_std=0.05):
    X_false = X[y == 0]
    n_aug = int(len(X_false) * ratio)
    if n_aug == 0:
        return X, y
    idxs = np.random.choice(len(X_false), n_aug, replace=True)
    X_aug = X_false[idxs] + np.random.normal(0, noise_std, X_false[idxs].shape)
    y_aug = np.zeros(n_aug, dtype=y.dtype)
    X_new = np.concatenate([X, X_aug], axis=0)
    y_new = np.concatenate([y, y_aug], axis=0)
    return X_new, y_new

DATA_ROOT       = Path(r"D:\Jabez\golf\fusion\embedding_data")
AGG_METHOD      = "flatten"
DEVICE          = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE      = 32

# 수동으로 best params 입력
LR              = 0.001
WEIGHT_DECAY    = 1e-4
RETRAIN_EPOCHS  = 50
EARLY_STOP      = 10
AUG_RATIO       = 3.0
NOISE_STD       = 0.05
Z_THRESH        = 3.0

def load_split_aligned(split, model):
    base = DATA_ROOT / model / split
    emb  = np.load(base / "embeddings.npy")
    lbl  = np.load(base / "labels.npy").ravel()
    ids  = np.load(base / "ids.npy", allow_pickle=True).ravel()
    return emb, lbl, ids

def reduce_stgcn(arr):
    if AGG_METHOD == "mean":   return np.nanmean(arr, axis=1)
    if AGG_METHOD == "max":    return np.nanmax(arr, axis=1)
    if AGG_METHOD == "flatten":return arr.reshape(arr.shape[0], -1)
    raise ValueError

def align_by_ids(emb1, ids1, emb2, ids2):
    idx_map = {id_: i for i, id_ in enumerate(ids2)}
    idxs = [idx_map[id_] for id_ in ids1]
    return emb2[idxs]

# 데이터 로드 및 정렬
X_ts_tr, y_tr, ids_ts_tr = load_split_aligned("train",   "timesformer")
X_st_tr, _,    ids_st_tr = load_split_aligned("train",   "stgcnpp")
X_ts_va, y_va, ids_ts_va = load_split_aligned("valid",   "timesformer")
X_st_va, _,    ids_st_va = load_split_aligned("valid",   "stgcnpp")
X_ts_te, y_te, ids_ts_te = load_split_aligned("test",    "timesformer")
X_st_te, _,    ids_st_te = load_split_aligned("test",    "stgcnpp")

X_st_tr = reduce_stgcn(align_by_ids(X_ts_tr, ids_ts_tr, X_st_tr, ids_st_tr))
X_st_va = reduce_stgcn(align_by_ids(X_ts_va, ids_ts_va, X_st_va, ids_st_va))
X_st_te = reduce_stgcn(align_by_ids(X_ts_te, ids_ts_te, X_st_te, ids_st_te))

X_tr_raw = np.concatenate([X_ts_tr, X_st_tr], axis=1).astype(np.float32)
X_va_raw = np.concatenate([X_ts_va, X_st_va], axis=1).astype(np.float32)
X_te_raw = np.concatenate([X_ts_te, X_st_te], axis=1).astype(np.float32)

# 정규화 (StandardScaler)
scaler = StandardScaler().fit(np.vstack([X_tr_raw, X_va_raw]))
X_tr_scaled = scaler.transform(X_tr_raw)
X_va_scaled = scaler.transform(X_va_raw)
X_te_scaled = scaler.transform(X_te_raw)

# 이상치 제거 (train+valid만 적용, test는 그대로 평가)
X_trva = np.vstack([X_tr_scaled, X_va_scaled])
y_trva = np.concatenate([y_tr, y_va], axis=0)
X_trva, y_trva, mask_trva = remove_outliers(X_trva, y_trva, z_thresh=Z_THRESH)

# 추가 정규화 (MinMaxScaler)
X_trva, X_te_scaled = extra_normalize(X_trva, X_te_scaled, X_te_scaled)[:2]

# 0 클래스 데이터 보강 (train+valid에만 적용)
X_trva, y_trva = augment_false_class(X_trva, y_trva, ratio=AUG_RATIO, noise_std=NOISE_STD)

# MLP 정의
class HeadMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.net(x)

def train_mlp(X, y):
    model = HeadMLP(X.shape[1]).to(DEVICE)
    true_count = np.sum(y == 1)
    false_count = np.sum(y == 0)
    weight = torch.tensor([true_count / false_count, 1.0], dtype=torch.float32).to(DEVICE)
    crit  = nn.CrossEntropyLoss(weight=weight, label_smoothing=0.1)
    opt   = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    ds = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long())
    dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
    best_loss, patience = float('inf'), 0
    for epoch in range(1, RETRAIN_EPOCHS+1):
        model.train(); running = 0.0
        for xb, yb in dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad(); out = model(xb)
            loss = crit(out, yb); loss.backward(); opt.step()
            running += loss.item() * yb.size(0)
        avg = running / len(ds)
        if avg < best_loss:
            best_loss, patience = avg, 0
            best_state = model.state_dict()
        else:
            patience += 1
            if patience >= EARLY_STOP: break
    model.load_state_dict(best_state)
    return model

def evaluate(model, X, y, title, threshold):
    model.eval()
    with torch.no_grad():
        probs = torch.softmax(model(torch.from_numpy(X).float().to(DEVICE)), 1)[:,1].cpu().numpy()
    preds = (probs > threshold).astype(int)
    acc = accuracy_score(y, preds)
    cm = confusion_matrix(y, preds)
    auc_val = auc(*roc_curve(y, probs)[:2])
    print(f"{title} acc={acc:.4f}, auc={auc_val:.4f}, threshold={threshold}")
    print("Confusion Matrix:\n", cm)
    print(classification_report(y, preds, digits=4))
    return acc, auc_val, cm

# train+valid 데이터로 학습 (수동 파라미터 적용)
X_train_final, y_train_final = X_trva, y_trva

scaler_final = StandardScaler().fit(X_train_final)
X_train_final = scaler_final.transform(X_train_final)
X_test_final = scaler_final.transform(X_te_scaled)
X_train_final, X_test_final = extra_normalize(X_train_final, X_test_final, X_test_final)[:2]

final_model = train_mlp(X_train_final, y_train_final)


evaluate(final_model, X_test_final, y_te, f"[Test-Manual-BestParams] threshold=0.5", threshold=0.5)

[Test-Manual-BestParams] threshold=0.5 acc=0.4301, auc=0.3935, threshold=0.5
Confusion Matrix:
 [[ 13 124]
 [ 31 104]]
              precision    recall  f1-score   support

           0     0.2955    0.0949    0.1436       137
           1     0.4561    0.7704    0.5730       135

    accuracy                         0.4301       272
   macro avg     0.3758    0.4326    0.3583       272
weighted avg     0.3752    0.4301    0.3567       272



(0.43014705882352944,
 0.3935117599351176,
 array([[ 13, 124],
        [ 31, 104]], dtype=int64))

# stgcn 임베딩만 활용

In [47]:
# 하이퍼파라미터 최적화 없이 바로 MLP 학습/평가 (수동 파라미터 적용)

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report, roc_curve, auc
)
from pathlib import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.stats import zscore

def extra_normalize(X_train, X_valid, X_test):
    scaler = MinMaxScaler()
    X_train_norm = scaler.fit_transform(X_train)
    X_valid_norm = scaler.transform(X_valid)
    X_test_norm  = scaler.transform(X_test)
    return X_train_norm, X_valid_norm, X_test_norm

def remove_outliers(X, y, z_thresh=3.0, min_valid_ratio=0.95):
    z = np.abs(zscore(X, axis=0, nan_policy='omit'))
    mask = (z < z_thresh).sum(axis=1) >= (X.shape[1] * min_valid_ratio)
    return X[mask], y[mask], mask

def augment_false_class(X, y, ratio=1.0, noise_std=0.05):
    X_false = X[y == 0]
    n_aug = int(len(X_false) * ratio)
    if n_aug == 0:
        return X, y
    idxs = np.random.choice(len(X_false), n_aug, replace=True)
    X_aug = X_false[idxs] + np.random.normal(0, noise_std, X_false[idxs].shape)
    y_aug = np.zeros(n_aug, dtype=y.dtype)
    X_new = np.concatenate([X, X_aug], axis=0)
    y_new = np.concatenate([y, y_aug], axis=0)
    return X_new, y_new

DATA_ROOT       = Path(r"D:\Jabez\golf\fusion\embedding_data")
AGG_METHOD      = "max"
DEVICE          = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE      = 32

# 수동으로 best params 입력
LR              = 0.001
WEIGHT_DECAY    = 1e-4
RETRAIN_EPOCHS  = 50
EARLY_STOP      = 10
AUG_RATIO       = 1.0   # false 데이터 보강 비율, 1.0이 두배
NOISE_STD       = 0.05  # 노이즈 표준편차
Z_THRESH        = 3.0

def load_split_aligned(split, model):
    base = DATA_ROOT / model / split
    emb  = np.load(base / "embeddings.npy")
    lbl  = np.load(base / "labels.npy").ravel()
    ids  = np.load(base / "ids.npy", allow_pickle=True).ravel()
    return emb, lbl, ids

def reduce_stgcn(arr):
    if AGG_METHOD == "mean":   return np.nanmean(arr, axis=1)
    if AGG_METHOD == "max":    return np.nanmax(arr, axis=1)
    if AGG_METHOD == "flatten":return arr.reshape(arr.shape[0], -1)
    raise ValueError

def align_by_ids(emb1, ids1, emb2, ids2):
    idx_map = {id_: i for i, id_ in enumerate(ids2)}
    idxs = [idx_map[id_] for id_ in ids1]
    return emb2[idxs]

# --- 기존 데이터 로드 및 정렬 부분에서 stgcn만 사용 ---
X_st_tr, y_tr, ids_st_tr = load_split_aligned("train",   "stgcnpp")
X_st_va, y_va, ids_st_va = load_split_aligned("valid",   "stgcnpp")
X_st_te, y_te, ids_st_te = load_split_aligned("test",    "stgcnpp")

X_st_tr = reduce_stgcn(X_st_tr)
X_st_va = reduce_stgcn(X_st_va)
X_st_te = reduce_stgcn(X_st_te)

X_tr_raw = X_st_tr.astype(np.float32)
X_va_raw = X_st_va.astype(np.float32)
X_te_raw = X_st_te.astype(np.float32)

# 정규화 (StandardScaler)
scaler = StandardScaler().fit(np.vstack([X_tr_raw, X_va_raw]))
X_tr_scaled = scaler.transform(X_tr_raw)
X_va_scaled = scaler.transform(X_va_raw)
X_te_scaled = scaler.transform(X_te_raw)

# 이상치 제거 (train+valid만 적용, test는 그대로 평가)
X_trva = np.vstack([X_tr_scaled, X_va_scaled])
y_trva = np.concatenate([y_tr, y_va], axis=0)
X_trva, y_trva, mask_trva = remove_outliers(X_trva, y_trva, z_thresh=Z_THRESH)

# 추가 정규화 (MinMaxScaler)
X_trva, X_te_scaled = extra_normalize(X_trva, X_te_scaled, X_te_scaled)[:2]

# 0 클래스 데이터 보강 (train+valid에만 적용)
X_trva, y_trva = augment_false_class(X_trva, y_trva, ratio=AUG_RATIO, noise_std=NOISE_STD)

# MLP 정의
class HeadMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.net(x)

def train_mlp(X, y):
    model = HeadMLP(X.shape[1]).to(DEVICE)
    true_count = np.sum(y == 1)
    false_count = np.sum(y == 0)
    weight = torch.tensor([true_count / false_count, 1.0], dtype=torch.float32).to(DEVICE)
    crit  = nn.CrossEntropyLoss(weight=weight, label_smoothing=0.1)
    opt   = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    ds = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long())
    dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
    best_loss, patience = float('inf'), 0
    for epoch in range(1, RETRAIN_EPOCHS+1):
        model.train(); running = 0.0
        for xb, yb in dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad(); out = model(xb)
            loss = crit(out, yb); loss.backward(); opt.step()
            running += loss.item() * yb.size(0)
        avg = running / len(ds)
        if avg < best_loss:
            best_loss, patience = avg, 0
            best_state = model.state_dict()
        else:
            patience += 1
            if patience >= EARLY_STOP: break
    model.load_state_dict(best_state)
    return model

def evaluate(model, X, y, title, threshold):
    model.eval()
    with torch.no_grad():
        probs = torch.softmax(model(torch.from_numpy(X).float().to(DEVICE)), 1)[:,1].cpu().numpy()
    preds = (probs > threshold).astype(int)
    acc = accuracy_score(y, preds)
    cm = confusion_matrix(y, preds)
    auc_val = auc(*roc_curve(y, probs)[:2])
    print(f"{title} acc={acc:.4f}, auc={auc_val:.4f}, threshold={threshold}")
    print("Confusion Matrix:\n", cm)
    print(classification_report(y, preds, digits=4))
    return acc, auc_val, cm

# train+valid 데이터로 학습 (수동 파라미터 적용)
X_train_final, y_train_final = X_trva, y_trva

scaler_final = StandardScaler().fit(X_train_final)
X_train_final = scaler_final.transform(X_train_final)
X_test_final = scaler_final.transform(X_te_scaled)
X_train_final, X_test_final = extra_normalize(X_train_final, X_test_final, X_test_final)[:2]

final_model = train_mlp(X_train_final, y_train_final)


evaluate(final_model, X_test_final, y_te, f"[Test-Manual-BestParams] threshold=0.5", threshold=0.5)


[Test-Manual-BestParams] threshold=0.5 acc=0.5221, auc=0.4896, threshold=0.5
Confusion Matrix:
 [[50 87]
 [43 92]]
              precision    recall  f1-score   support

           0     0.5376    0.3650    0.4348       137
           1     0.5140    0.6815    0.5860       135

    accuracy                         0.5221       272
   macro avg     0.5258    0.5232    0.5104       272
weighted avg     0.5259    0.5221    0.5098       272



(0.5220588235294118,
 0.4895917815625844,
 array([[50, 87],
        [43, 92]], dtype=int64))

# timesformer 임베딩

In [48]:
# 하이퍼파라미터 최적화 없이 바로 MLP 학습/평가 (수동 파라미터 적용)

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report, roc_curve, auc
)
from pathlib import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.stats import zscore

def extra_normalize(X_train, X_valid, X_test):
    scaler = MinMaxScaler()
    X_train_norm = scaler.fit_transform(X_train)
    X_valid_norm = scaler.transform(X_valid)
    X_test_norm  = scaler.transform(X_test)
    return X_train_norm, X_valid_norm, X_test_norm

def remove_outliers(X, y, z_thresh=3.0, min_valid_ratio=0.95):
    z = np.abs(zscore(X, axis=0, nan_policy='omit'))
    mask = (z < z_thresh).sum(axis=1) >= (X.shape[1] * min_valid_ratio)
    return X[mask], y[mask], mask

def augment_false_class(X, y, ratio=1.0, noise_std=0.05):
    X_false = X[y == 0]
    n_aug = int(len(X_false) * ratio)
    if n_aug == 0:
        return X, y
    idxs = np.random.choice(len(X_false), n_aug, replace=True)
    X_aug = X_false[idxs] + np.random.normal(0, noise_std, X_false[idxs].shape)
    y_aug = np.zeros(n_aug, dtype=y.dtype)
    X_new = np.concatenate([X, X_aug], axis=0)
    y_new = np.concatenate([y, y_aug], axis=0)
    return X_new, y_new

DATA_ROOT       = Path(r"D:\Jabez\golf\fusion\embedding_data")
AGG_METHOD      = "flatten"
DEVICE          = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE      = 16

# 수동으로 best params 입력
LR              = 0.001
WEIGHT_DECAY    = 1e-4
RETRAIN_EPOCHS  = 50
EARLY_STOP      = 10
AUG_RATIO       = 1.0
NOISE_STD       = 0.05
Z_THRESH        = 3.0

def load_split_aligned(split, model):
    base = DATA_ROOT / model / split
    emb  = np.load(base / "embeddings.npy")
    lbl  = np.load(base / "labels.npy").ravel()
    ids  = np.load(base / "ids.npy", allow_pickle=True).ravel()
    return emb, lbl, ids

def reduce_stgcn(arr):
    if AGG_METHOD == "mean":   return np.nanmean(arr, axis=1)
    if AGG_METHOD == "max":    return np.nanmax(arr, axis=1)
    if AGG_METHOD == "flatten":return arr.reshape(arr.shape[0], -1)
    raise ValueError

def align_by_ids(emb1, ids1, emb2, ids2):
    idx_map = {id_: i for i, id_ in enumerate(ids2)}
    idxs = [idx_map[id_] for id_ in ids1]
    return emb2[idxs]

# --- 기존 데이터 로드 및 정렬 부분에서 timesformer만 사용 ---
X_ts_tr, y_tr, ids_ts_tr = load_split_aligned("train",   "timesformer")
X_ts_va, y_va, ids_ts_va = load_split_aligned("valid",   "timesformer")
X_ts_te, y_te, ids_ts_te = load_split_aligned("test",    "timesformer")

X_ts_tr = X_ts_tr.astype(np.float32)
X_ts_va = X_ts_va.astype(np.float32)
X_ts_te = X_ts_te.astype(np.float32)

X_tr_raw = X_ts_tr
X_va_raw = X_ts_va
X_te_raw = X_ts_te

# 정규화 (StandardScaler)
scaler = StandardScaler().fit(np.vstack([X_tr_raw, X_va_raw]))
X_tr_scaled = scaler.transform(X_tr_raw)
X_va_scaled = scaler.transform(X_va_raw)
X_te_scaled = scaler.transform(X_te_raw)

# 이상치 제거 (train+valid만 적용, test는 그대로 평가)
X_trva = np.vstack([X_tr_scaled, X_va_scaled])
y_trva = np.concatenate([y_tr, y_va], axis=0)
X_trva, y_trva, mask_trva = remove_outliers(X_trva, y_trva, z_thresh=Z_THRESH)

# 추가 정규화 (MinMaxScaler)
X_trva, X_te_scaled = extra_normalize(X_trva, X_te_scaled, X_te_scaled)[:2]

# 0 클래스 데이터 보강 (train+valid에만 적용)
X_trva, y_trva = augment_false_class(X_trva, y_trva, ratio=AUG_RATIO, noise_std=NOISE_STD)

# MLP 정의
class HeadMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.net(x)

def train_mlp(X, y):
    model = HeadMLP(X.shape[1]).to(DEVICE)
    true_count = np.sum(y == 1)
    false_count = np.sum(y == 0)
    weight = torch.tensor([true_count / false_count, 1.0], dtype=torch.float32).to(DEVICE)
    crit  = nn.CrossEntropyLoss(weight=weight, label_smoothing=0.1)
    opt   = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    ds = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long())
    dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
    best_loss, patience = float('inf'), 0
    for epoch in range(1, RETRAIN_EPOCHS+1):
        model.train(); running = 0.0
        for xb, yb in dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad(); out = model(xb)
            loss = crit(out, yb); loss.backward(); opt.step()
            running += loss.item() * yb.size(0)
        avg = running / len(ds)
        if avg < best_loss:
            best_loss, patience = avg, 0
            best_state = model.state_dict()
        else:
            patience += 1
            if patience >= EARLY_STOP: break
    model.load_state_dict(best_state)
    return model

def evaluate(model, X, y, title, threshold):
    model.eval()
    with torch.no_grad():
        probs = torch.softmax(model(torch.from_numpy(X).float().to(DEVICE)), 1)[:,1].cpu().numpy()
    preds = (probs > threshold).astype(int)
    acc = accuracy_score(y, preds)
    cm = confusion_matrix(y, preds)
    auc_val = auc(*roc_curve(y, probs)[:2])
    print(f"{title} acc={acc:.4f}, auc={auc_val:.4f}, threshold={threshold}")
    print("Confusion Matrix:\n", cm)
    print(classification_report(y, preds, digits=4))
    return acc, auc_val, cm

# train+valid 데이터로 학습 (수동 파라미터 적용)
X_train_final, y_train_final = X_trva, y_trva

scaler_final = StandardScaler().fit(X_train_final)
X_train_final = scaler_final.transform(X_train_final)
X_test_final = scaler_final.transform(X_te_scaled)
X_train_final, X_test_final = extra_normalize(X_train_final, X_test_final, X_test_final)[:2]

final_model = train_mlp(X_train_final, y_train_final)

evaluate(final_model, X_test_final, y_te, f"[Test-Manual-BestParams] threshold=0.50", threshold=0.5)


[Test-Manual-BestParams] threshold=0.50 acc=0.4338, auc=0.4069, threshold=0.5
Confusion Matrix:
 [[ 23 114]
 [ 40  95]]
              precision    recall  f1-score   support

           0     0.3651    0.1679    0.2300       137
           1     0.4545    0.7037    0.5523       135

    accuracy                         0.4338       272
   macro avg     0.4098    0.4358    0.3912       272
weighted avg     0.4095    0.4338    0.3900       272



(0.4338235294117647,
 0.40692078940254117,
 array([[ 23, 114],
        [ 40,  95]], dtype=int64))

# 하이퍼파라미터 최적화

In [5]:
# mlp_finetune_evaluation.py
# Fusion Embedding 기반 MLP 학습 및 평가 전체 파이프라인 (Jupyter 환경용)

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report, roc_curve, auc
)
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler

# ▪ 추가 정규화 함수 (MinMaxScaler)
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import zscore

def extra_normalize(X_train, X_valid, X_test):
    scaler = MinMaxScaler()
    X_train_norm = scaler.fit_transform(X_train)
    X_valid_norm = scaler.transform(X_valid)
    X_test_norm  = scaler.transform(X_test)
    return X_train_norm, X_valid_norm, X_test_norm

def remove_outliers(X, y, z_thresh=3.0, min_valid_ratio=0.95):
    z = np.abs(zscore(X, axis=0, nan_policy='omit'))
    # 전체 feature 중 min_valid_ratio 이상이 z_thresh 이하면 통과
    mask = (z < z_thresh).sum(axis=1) >= (X.shape[1] * min_valid_ratio)
    return X[mask], y[mask], mask

# ▪ 0 클래스(negative) 데이터 보강 함수 (간단한 노이즈 증강 예시)
def augment_false_class(X, y, ratio=2.0, noise_std=0.01):
    X_false = X[y == 0]
    n_aug = int(len(X_false) * ratio)
    if n_aug == 0:
        return X, y
    idxs = np.random.choice(len(X_false), n_aug, replace=True)
    X_aug = X_false[idxs] + np.random.normal(0, noise_std, X_false[idxs].shape)
    y_aug = np.zeros(n_aug, dtype=y.dtype)
    X_new = np.concatenate([X, X_aug], axis=0)
    y_new = np.concatenate([y, y_aug], axis=0)
    return X_new, y_new

# ▪ 설정
DATA_ROOT       = Path(r"D:\Jabez\golf\fusion\embedding_data")
AGG_METHOD      = "flatten"  # "mean" / "max" / "flatten"
DEVICE          = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE      = 64
LR              = 1e-4
WEIGHT_DECAY    = 1e-4
RETRAIN_EPOCHS  = 50
EARLY_STOP      = 5

# ▪ 데이터 로드 및 정렬 함수
def load_split_aligned(split, model):
    base = DATA_ROOT / model / split
    emb  = np.load(base / "embeddings.npy")
    lbl  = np.load(base / "labels.npy").ravel()
    ids  = np.load(base / "ids.npy", allow_pickle=True).ravel()
    return emb, lbl, ids

def reduce_stgcn(arr):
    if AGG_METHOD == "mean":   return np.nanmean(arr, axis=1)
    if AGG_METHOD == "max":    return np.nanmax(arr, axis=1)
    if AGG_METHOD == "flatten":return arr.reshape(arr.shape[0], -1)
    raise ValueError

def align_by_ids(emb1, ids1, emb2, ids2):
    idx_map = {id_: i for i, id_ in enumerate(ids2)}
    idxs = [idx_map[id_] for id_ in ids1]
    return emb2[idxs]

# ▪ Split 별 TS/ST 로드 및 정렬
X_ts_tr, y_tr, ids_ts_tr = load_split_aligned("train",   "timesformer")
X_st_tr, _,    ids_st_tr = load_split_aligned("train",   "stgcnpp")
X_ts_va, y_va, ids_ts_va = load_split_aligned("valid",   "timesformer")
X_st_va, _,    ids_st_va = load_split_aligned("valid",   "stgcnpp")
X_ts_te, y_te, ids_ts_te = load_split_aligned("test",    "timesformer")
X_st_te, _,    ids_st_te = load_split_aligned("test",    "stgcnpp")

X_st_tr = reduce_stgcn(align_by_ids(X_ts_tr, ids_ts_tr, X_st_tr, ids_st_tr))
X_st_va = reduce_stgcn(align_by_ids(X_ts_va, ids_ts_va, X_st_va, ids_st_va))
X_st_te = reduce_stgcn(align_by_ids(X_ts_te, ids_ts_te, X_st_te, ids_st_te))

X_tr_raw = np.concatenate([X_ts_tr, X_st_tr], axis=1).astype(np.float32)
X_va_raw = np.concatenate([X_ts_va, X_st_va], axis=1).astype(np.float32)
X_te_raw = np.concatenate([X_ts_te, X_st_te], axis=1).astype(np.float32)

# ▪ 정규화 (StandardScaler)
scaler = StandardScaler().fit(np.vstack([X_tr_raw, X_va_raw]))
X_tr_scaled = scaler.transform(X_tr_raw)
X_va_scaled = scaler.transform(X_va_raw)
X_te_scaled = scaler.transform(X_te_raw)

# ▪ 이상치 제거 (train+valid만 적용, test는 그대로 평가)
X_trva = np.vstack([X_tr_scaled, X_va_scaled])
y_trva = np.concatenate([y_tr, y_va], axis=0)
X_trva, y_trva, mask_trva = remove_outliers(X_trva, y_trva, z_thresh=3.0)

# ▪ 추가 정규화 (MinMaxScaler)
X_trva, X_te_scaled = extra_normalize(X_trva, X_te_scaled, X_te_scaled)[:2]  # train+valid, test

# ▪ 0 클래스 데이터 보강 (train+valid에만 적용)
X_trva, y_trva = augment_false_class(X_trva, y_trva, ratio=1.0, noise_std=0.01)

# ▪ 모델 정의
class HeadMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.net(x)

# ▪ 학습 함수 (클래스 불균형 가중치 적용)
def train_mlp(X, y):
    model = HeadMLP(X.shape[1]).to(DEVICE)
    true_count = np.sum(y == 1)
    false_count = np.sum(y == 0)
    # 아래 줄 삭제: running += loss.item() * yb.size(0)
    weight = torch.tensor([true_count / false_count, 1.0], dtype=torch.float32).to(DEVICE)
    crit  = nn.CrossEntropyLoss(weight=weight, label_smoothing=0.1)
    opt   = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    ds = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long())
    dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
    best_loss, patience = float('inf'), 0
    for epoch in range(1, RETRAIN_EPOCHS+1):
        model.train(); running = 0.0
        for xb, yb in dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad(); out = model(xb)
            loss = crit(out, yb); loss.backward(); opt.step()
            running += loss.item() * yb.size(0)
        avg = running / len(ds)
        if avg < best_loss:
            best_loss, patience = avg, 0
            best_state = model.state_dict()
        else:
            patience += 1
            if patience >= EARLY_STOP: break
    model.load_state_dict(best_state)
    return model

# ▪ 평가 함수 (임계값 조정 추가)
def evaluate(model, X, y, title, threshold=0.5):
    model.eval()
    with torch.no_grad():
        # ⬇️ float() 추가
        probs = torch.softmax(model(torch.from_numpy(X).float().to(DEVICE)), 1)[:,1].cpu().numpy()
    preds = (probs > threshold).astype(int)
    acc = accuracy_score(y, preds)
    fpr, tpr, _ = roc_curve(y, probs)
    auc_val = auc(fpr, tpr)
    print(f"{title} acc={acc:.4f}, auc={auc_val:.4f}, threshold={threshold}")
    print("CM:\n", confusion_matrix(y, preds))
    print(classification_report(y, preds, digits=4))
    return fpr, tpr, auc_val

from sklearn.model_selection import StratifiedKFold, ParameterGrid

# ▪ N-fold 교차검증 + 하이퍼파라미터 탐색 함수
def nfold_hyperopt(X, y, param_grid, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    best_score = -1
    best_params = None
    best_model = None

    for params in ParameterGrid(param_grid):
        fold_scores = []
        for train_idx, val_idx in skf.split(X, y):
            X_tr, X_val = X[train_idx], X[val_idx]
            y_tr, y_val = y[train_idx], y[val_idx]

            # 데이터 증강 및 이상치 제거 (fold 내에서만)
            X_tr_, y_tr_ = augment_false_class(X_tr, y_tr, ratio=params['aug_ratio'], noise_std=params['noise_std'])
            X_tr_, y_tr_, _ = remove_outliers(X_tr_, y_tr_, z_thresh=params['z_thresh'])

            # ⬇️ 데이터가 0개면 fold 스킵
            if len(X_tr_) == 0 or len(y_tr_) == 0:
                print(f"⚠️ Fold skipped: No data after outlier removal (params={params})")
                continue

            # 정규화 (fit은 train, transform은 train/val)
            scaler = StandardScaler().fit(X_tr_)
            X_tr_ = scaler.transform(X_tr_)
            X_val_ = scaler.transform(X_val)

            # MinMax 추가 정규화
            X_tr_, X_val_ = extra_normalize(X_tr_, X_val_, X_val_)[:2]

            # 모델 학습
            global LR, WEIGHT_DECAY, RETRAIN_EPOCHS, EARLY_STOP
            LR, WEIGHT_DECAY, RETRAIN_EPOCHS, EARLY_STOP = params['lr'], params['weight_decay'], params['epochs'], params['early_stop']
            model = train_mlp(X_tr_, y_tr_)

            # 검증 점수(AUC)
            _, _, auc_val = evaluate(model, X_val_, y_val, title="[Val]", threshold=0.5)
            fold_scores.append(auc_val)

        # fold_scores가 비어있으면 평균 계산하지 않음
        if len(fold_scores) == 0:
            print(f"⚠️ All folds skipped for params={params}")
            continue

        avg_score = np.mean(fold_scores)
        print(f"Params: {params} | Avg AUC: {avg_score:.4f}")
        if avg_score > best_score:
            best_score = avg_score
            best_params = params

    print(f"\nBest Params: {best_params} | Best CV AUC: {best_score:.4f}")
    return best_params

# ▪ 하이퍼파라미터 탐색 공간 정의
param_grid = {
    'lr':         [1e-3, 1e-4],
    'weight_decay': [1e-4, 1e-5],
    'epochs':     [30, 50],
    'early_stop': [5, 10],
    'aug_ratio':  [0.5, 1.0],
    'noise_std':  [0.01, 0.05],
    'z_thresh':   [2.5, 3.0]
}

# ▪ train+valid 데이터로 최적 하이퍼파라미터 탐색
best_params = nfold_hyperopt(X_trva, y_trva, param_grid, n_splits=5)

# ▪ 최적 하이퍼파라미터로 전체 train+valid 재학습, test 평가
# (이상치 제거, 증강, 정규화 동일하게 적용)
X_train_final, y_train_final = augment_false_class(X_trva, y_trva, ratio=best_params['aug_ratio'], noise_std=best_params['noise_std'])
X_train_final, y_train_final, _ = remove_outliers(X_train_final, y_train_final, z_thresh=best_params['z_thresh'])
scaler_final = StandardScaler().fit(X_train_final)
X_train_final = scaler_final.transform(X_train_final)
X_test_final = scaler_final.transform(X_te_scaled)
X_train_final, X_test_final = extra_normalize(X_train_final, X_test_final, X_test_final)[:2]

LR, WEIGHT_DECAY, RETRAIN_EPOCHS, EARLY_STOP = best_params['lr'], best_params['weight_decay'], best_params['epochs'], best_params['early_stop']
final_model = train_mlp(X_train_final, y_train_final)

# ▪ Test 평가 (threshold=0.5 기준)
evaluate(final_model, X_test_final, y_te, "[Test-Optimized]", threshold=0.5)

[Val] acc=0.9116, auc=0.9644, threshold=0.5
CM:
 [[143  18]
 [ 20 249]]
              precision    recall  f1-score   support

           0     0.8773    0.8882    0.8827       161
           1     0.9326    0.9257    0.9291       269

    accuracy                         0.9116       430
   macro avg     0.9049    0.9069    0.9059       430
weighted avg     0.9119    0.9116    0.9117       430

[Val] acc=0.9349, auc=0.9618, threshold=0.5
CM:
 [[150  11]
 [ 17 252]]
              precision    recall  f1-score   support

           0     0.8982    0.9317    0.9146       161
           1     0.9582    0.9368    0.9474       269

    accuracy                         0.9349       430
   macro avg     0.9282    0.9342    0.9310       430
weighted avg     0.9357    0.9349    0.9351       430

[Val] acc=0.9349, auc=0.9618, threshold=0.5
CM:
 [[150  11]
 [ 17 252]]
              precision    recall  f1-score   support

           0     0.8982    0.9317    0.9146       161
           1     0.95

(array([0.        , 0.00729927, 0.03649635, 0.03649635, 0.06569343,
        0.06569343, 0.11678832, 0.11678832, 0.12408759, 0.12408759,
        0.1459854 , 0.1459854 , 0.16058394, 0.16058394, 0.17518248,
        0.17518248, 0.18248175, 0.18248175, 0.19708029, 0.19708029,
        0.20437956, 0.20437956, 0.21167883, 0.21167883, 0.23357664,
        0.23357664, 0.27737226, 0.27737226, 0.2919708 , 0.2919708 ,
        0.30656934, 0.30656934, 0.32116788, 0.32116788, 0.32846715,
        0.32846715, 0.3649635 , 0.3649635 , 0.40145985, 0.40145985,
        0.40875912, 0.40875912, 0.41605839, 0.41605839, 0.42335766,
        0.42335766, 0.43065693, 0.43065693, 0.44525547, 0.44525547,
        0.46715328, 0.46715328, 0.47445255, 0.47445255, 0.48175182,
        0.48175182, 0.51824818, 0.51824818, 0.53284672, 0.53284672,
        0.54014599, 0.54014599, 0.55474453, 0.55474453, 0.56934307,
        0.56934307, 0.57664234, 0.57664234, 0.59124088, 0.59124088,
        0.60583942, 0.60583942, 0.62043796, 0.62