In [1]:
# 📄 src/preprocessing/anomaly_preprocess.py

import os
import numpy as np
from scipy.io import loadmat

def preprocess_mat_folder(folder_path, selected_channels, save_path, label, augment_times=1):
    X_all = []
    y_all = []

    for fname in sorted(os.listdir(folder_path)):
        if not fname.endswith(".mat"):
            continue

        file_path = os.path.join(folder_path, fname)
        mat_data = loadmat(file_path)

        signals = []
        for ch in selected_channels:
            if ch not in mat_data:
                print(f"⚠️ {ch} not found in {fname}")
                continue
            signals.append(mat_data[ch])  # shape: (epochs, time)

        if len(signals) != len(selected_channels):
            print(f"❌ Skipping {fname} due to missing channels")
            continue

        data = np.stack(signals, axis=1)  # (epochs, channels, time)

        # Z-score normalize
        for c in range(data.shape[1]):
            ch_data = data[:, c, :]
            mean = np.mean(ch_data)
            std = np.std(ch_data) + 1e-6
            data[:, c, :] = (ch_data - mean) / std

        labels = np.full((data.shape[0],), label)

        # ✅ 데이터 보강 (특히 정상 데이터용)
        for i in range(augment_times):
            X_all.append(data.copy())
            y_all.append(labels.copy())

        print(f"✅ {fname} → {data.shape} x {augment_times} 회 누적")

    X_all = np.concatenate(X_all, axis=0)
    y_all = np.concatenate(y_all, axis=0)

    os.makedirs(save_path, exist_ok=True)
    np.save(os.path.join(save_path, f'X_label{label}.npy'), X_all)
    np.save(os.path.join(save_path, f'y_label{label}.npy'), y_all)
    print(f"📁 Saved to {save_path} | X: {X_all.shape}, y: {y_all.shape}")

if __name__ == "__main__":
    selected_channels = ['F3_A2', 'C3_A2', 'O1_A2', 'F4_A1', 'C4_A1',
                         'O2_A1', 'LOC_A2', 'ROC_A1', 'X1', 'X2']

    # ✅ 정상 데이터 10배 보강
    preprocess_mat_folder("../../Database/raw/Healthy_Subjects", selected_channels, "../../Database/processed", label=0, augment_times=10)
    preprocess_mat_folder("../../Database/raw/Unhealthy_Subjects", selected_channels, "../../Database/processed", label=1, augment_times=1)


✅ subject1.mat → (924, 10, 6000) x 10 회 누적
✅ subject10.mat → (766, 10, 6000) x 10 회 누적
✅ subject2.mat → (911, 10, 6000) x 10 회 누적
✅ subject3.mat → (794, 10, 6000) x 10 회 누적
✅ subject4.mat → (764, 10, 6000) x 10 회 누적
✅ subject5.mat → (914, 10, 6000) x 10 회 누적
✅ subject6.mat → (823, 10, 6000) x 10 회 누적
✅ subject7.mat → (784, 10, 6000) x 10 회 누적
✅ subject8.mat → (970, 10, 6000) x 10 회 누적
✅ subject9.mat → (939, 10, 6000) x 10 회 누적
📁 Saved to ../../Database/processed | X: (85890, 10, 6000), y: (85890,)
✅ subject1.mat → (850, 10, 6000) x 1 회 누적
✅ subject10.mat → (812, 10, 6000) x 1 회 누적
✅ subject100.mat → (831, 10, 6000) x 1 회 누적
✅ subject11.mat → (967, 10, 6000) x 1 회 누적
✅ subject12.mat → (820, 10, 6000) x 1 회 누적
✅ subject13.mat → (852, 10, 6000) x 1 회 누적
✅ subject14.mat → (876, 10, 6000) x 1 회 누적
✅ subject15.mat → (756, 10, 6000) x 1 회 누적
✅ subject16.mat → (853, 10, 6000) x 1 회 누적
✅ subject17.mat → (821, 10, 6000) x 1 회 누적
✅ subject18.mat → (969, 10, 6000) x 1 회 누적
✅ subject19.mat → (798, 