### 環境建置

In [2]:
# TEST necessary for when working with external scripts
%load_ext autoreload
%autoreload 2

In [3]:
import os
import pandas as pd
import numpy as np
import pickle

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import LeaveOneGroupOut, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report
from scipy.signal import butter, lfilter
from scipy import stats
from sklearn.preprocessing import MinMaxScaler


# 定義濾波器函數
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    db = np.sum(y**2)
    return db

def signal_frequency_band_energies(sampled_signal, frequency_bands, sampling_frequency, order=5):
    energies = []
    for bands in frequency_bands:
        energies.append(butter_bandpass_filter(sampled_signal, bands[0], bands[1], sampling_frequency, order))
    return energies

def extract_windows(data, window_size):
    windows = []
    for i in range(0, len(data) - window_size, window_size):  # 每次跳過window_size的長度
        window = data[i:i + window_size]
        
        if len(window) == window_size:  # 確保每個窗口的大小是固定的
            windows.append(window)
    return np.array(windows)

### 資料讀取

In [77]:
BASE_PATH = "../WESAD/"

all_processed_windows = []

for i in range(2, 18):
    if i == 12: 
        continue

    subject_path = os.path.join(BASE_PATH, f'S{i}')
    pickle_file = os.path.join(subject_path, f'S{i}.pkl')

    with open(pickle_file, 'rb') as f:
        data = pickle.load(f, encoding='bytes')

        labels = data[b'label']
        signals = data[b'signal'][b'chest']

        # 只選擇 label == 1 或 label == 2 的數據
        mask = (labels == 1) | (labels == 2)
        labels = labels[mask]
        signals = {k: v[mask] for k, v in signals.items()}

        acc_signal = signals[b'ACC'].flatten()
        ecg_signal = signals[b'ECG'].flatten()
        emg_signal = signals[b'EMG'].flatten()
        eda_signal = signals[b'EDA'].flatten()
        resp_signal = signals[b'Resp'].flatten()
        temp_signal = signals[b'Temp'].flatten()
        
        # 確保所有信號長度一致
        min_length = min(len(acc_signal), len(ecg_signal), len(emg_signal), len(eda_signal), len(resp_signal), len(temp_signal), len(labels))
        ecg_signal = ecg_signal[:min_length]
        emg_signal = emg_signal[:min_length]
        eda_signal = eda_signal[:min_length]
        resp_signal = resp_signal[:min_length]
        temp_signal = temp_signal[:min_length]
        labels = labels[:min_length]

        window_size = 2100
        
        # 獲取窗口的索引範圍
        windows_indices = [(i, i+window_size) for i in range(0, min_length-window_size, window_size)]
        
        # 對每個窗口處理
        for start_idx, end_idx in windows_indices:
            # 提取該窗口的各信號
            acc_window = acc_signal[start_idx:end_idx]
            ecg_window = ecg_signal[start_idx:end_idx]
            emg_window = emg_signal[start_idx:end_idx]
            eda_window = eda_signal[start_idx:end_idx]
            resp_window = resp_signal[start_idx:end_idx]
            temp_window = temp_signal[start_idx:end_idx]
            
            # 獲取該窗口的主要標籤（可以使用眾數或其他方法）
            window_labels = labels[start_idx:end_idx]
            label_mode = stats.mode(window_labels, keepdims=True)[0][0]
            
            # 計算ECG的頻帶能量
            ecg_bands = signal_frequency_band_energies(ecg_window, 
                                                      [[0.01, 0.04], [0.04, 0.15], [0.15, 0.4], [0.4, 1.0]], 
                                                      700)

            # 計算各信號的統計特徵
            def compute_features(signal_window):
                mean = np.mean(signal_window)
                std = np.std(signal_window)
                return mean, std

            # ACC特徵
            acc_mean, acc_std = compute_features(acc_window)
            # EDA特徵
            eda_mean, eda_std = compute_features(eda_window)
            # EMG特徵
            emg_mean, emg_std = compute_features(emg_window)
            # Resp特徵
            resp_mean, resp_std = compute_features(resp_window)
            # Temp特徵
            temp_mean, temp_std = compute_features(temp_window)
            
            # 將所有特徵和標籤組合成
            window_features = np.concatenate([ecg_bands, 
                                              [acc_mean, acc_std], 
                                              [eda_mean, eda_std], 
                                              [emg_mean, emg_std], 
                                              [resp_mean, resp_std], 
                                              [temp_mean, temp_std], 
                                              [label_mode]])

            
            all_processed_windows.append(window_features)

columns = ['ECG_ULF', 'ECG_LF', 'ECG_HF', 'ECG_UHF', 'ACC_mean', 'ACC_std', 
           'EDA_mean', 'EDA_std', 'EMG_mean', 'EMG_std', 'Resp_mean', 'Resp_std', 
           'Temp_mean', 'Temp_std', 'Label']

processed_data_df = pd.DataFrame(all_processed_windows, columns=columns)

# 正規化所有特徵
scaler = MinMaxScaler()

# 提取特徵列
features = processed_data_df.iloc[:, :-1].values  # 所有特徵列，排除標籤和受試者ID
features_normalized = scaler.fit_transform(features)

# 創建一個新的DataFrame來存儲正規化後的數據
normalized_df = pd.DataFrame(features_normalized, columns=processed_data_df.columns[:-1])

# 將正規化後的數據與標籤和受試者ID合併
normalized_df['Label'] = processed_data_df['Label']

output_file = "../output/lab2_data.csv"
normalized_df.to_csv(output_file, index=False)

print(f"Processed data saved to {output_file}")


正規化完成並保存至 CSV。


In [79]:
df = pd.read_csv("../output/lab2_data.csv")

# 分開特徵和標籤

X = df.drop(columns=['Label'])  # 移除標籤 (y)  
y = df['Label'] 


# 切分訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 模型

決策樹

In [80]:
# 初始化並訓練決策樹
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
model_name = "DecisionTree"

KNN

In [65]:

model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)
model_name = "KNN"


隨機森林

In [67]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
model_name = "RandomForest"



AdaBoost 決策樹

In [69]:

model = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)
model.fit(X_train, y_train)
model_name = "Adaboost Decision Tree"



線性判別分析 (LDA)


In [71]:
model = LinearDiscriminantAnalysis()
model.fit(X_train, y_train)
model_name = "LDA"

### 生成結果


In [81]:
# 預測測試集
print(model_name)
y_pred = model.predict(X_test)

# 計算準確率
accuracy = accuracy_score(y_test, y_pred)
print(f"模型準確率: {accuracy:.4f}")

# 輸出分類報告
print("分類報告：")
print(classification_report(y_test, y_pred, digits=4))


DecisionTree
模型準確率: 0.9880
分類報告：
              precision    recall  f1-score   support

         1.0     0.9898    0.9914    0.9906      1169
         2.0     0.9850    0.9820    0.9835       668

    accuracy                         0.9880      1837
   macro avg     0.9874    0.9867    0.9871      1837
weighted avg     0.9880    0.9880    0.9880      1837



In [75]:
logo = LeaveOneGroupOut()
y_pred = cross_val_predict(model, X, y, cv=logo, groups=df['Subject'])
accuracy = accuracy_score(y, y_pred)
print(f"模型準確率: {accuracy:.4f}")
report = classification_report(y, y_pred, digits=4)
print("分類報告：")
print(report)

模型準確率: 0.6344
分類報告：
              precision    recall  f1-score   support

         1.0     0.7652    0.6174    0.6834      5868
         2.0     0.4953    0.6646    0.5676      3315

    accuracy                         0.6344      9183
   macro avg     0.6302    0.6410    0.6255      9183
weighted avg     0.6677    0.6344    0.6416      9183

