# 題目
- DT + 全類別型條件屬性 + 類別型目標屬性

## 注意事項
- 深度學習建議使用 Nvidia GPU , 激活 Pytorch CUDA 加速功能 , 在 Colab 推薦使用 Nvidia T4 之上的 GPU 加速訓練與推理速度

## Colab 環境介紹
- 整體為 VM (虛擬機)
- OS : Ubuntu 22.04 LTS
- CPU : 虛擬化 CPU 雙核約 2GHz
- RAM : 約 12.7 GB , 頻率不一定
- Disk : 約 112.6 GB , 讀寫速度不一定

## NVIDIA T4 GPU (GPU直通)
- NVIDIA CUDA 核心 : 2,560
- VRAM : 16 GB GDDR6 , 可用約 15.0 GB , 頻寬320+ GB/s
- FP32 : 8.1 TFLOPS
- FP16/FP32 : 65 FP16 TFLOPS
- INT8 : 130 INT8 TOPS
- INT4 : 260 INT4 TOPS
- PCle : Gen3 x16

## 使用資料集
- [Nursery](https://archive.ics.uci.edu/dataset/76/nursery)

## 資料處理 - B1143028 詹朝成

In [1]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [2]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split

# fetch dataset
nursery = fetch_ucirepo(id=76)
# data (as pandas dataframes)
feature , target = nursery.data.features, nursery.data.targets
X = nursery.data.features
y = nursery.data.targets

# 定義每個欄位的數字映射
mappings_X = {
    'parents': {'usual': 0, 'pretentious': 1, 'great_pret': 2},
    'has_nurs': {'proper': 0, 'less_proper': 1, 'improper': 2, 'critical': 3, 'very_crit': 4},
    'form': {'complete': 0, 'completed': 1, 'incomplete': 2, 'foster': 3},
    'children': {'1': 0, '2': 1, '3': 2, 'more': 3},
    'housing': {'convenient': 0, 'less_conv': 1, 'critical': 2},
    'finance': {'convenient': 0, 'inconv': 1},
    'social': {'nonprob': 0, 'slightly_prob': 1, 'problematic': 2},
    'health': {'recommended': 0, 'priority': 1, 'not_recom': 2},
}
mappings_y = {
    'class': {'recommend': 0, 'priority': 1, 'not_recom': 2, 'very_recom': 3, 'spec_prior': 4}
}

X_mapped = X.copy()
y_mapped = y.copy()
# 應用每個欄位的數字映射
for column, mapping in mappings_X.items():
    X_mapped[column] = X_mapped[column].map(mapping)

for column, mapping in mappings_y.items():
    y_mapped[column] = y_mapped[column].map(mapping)

# 將資料轉換為numpy.ndarray
X = X_mapped.to_numpy()  # 將特徵轉換為numpy陣列
y = y_mapped.to_numpy()  # 將目標轉換為numpy陣列
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 深度學習 - 軟決策樹 - B1143015 林宣佑

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
import time
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification # 生成測試用資料
from sklearn.model_selection import train_test_split
from huggingface_hub import HfApi, HfFolder, Repository
from safetensors.torch import save_file
from google.colab import userdata
# 檢測設備是否支援 CUDA 加速
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print("*****CUDA Status*****")
print(f"CUDA Available: {USE_CUDA}")
if USE_CUDA:
    print(f"CUDA Device: {torch.cuda.current_device()}")
    print(f"CUDA Device Name: {torch.cuda.get_device_name(device)}")

# 定義軟決策樹 (深度學習模仿決策樹的方案)
class SoftDecisionTree(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, tau=1.0):
        super(SoftDecisionTree, self).__init__()
        self.tau = tau  # 溫度參數，用於 Gumbel-Softmax
        # 定義網絡層
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, output_dim)
    def gumbel_softmax(self, logits, tau):
        # 生成 Gumbel 噪聲
        gumbel_noise = -torch.log(-torch.log(torch.rand_like(logits) + 1e-10) + 1e-10)
        y = logits + gumbel_noise
        return F.softmax(y / tau, dim=-1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.gumbel_softmax(self.fc2(x), self.tau)
        x = self.gumbel_softmax(self.fc3(x), self.tau)
        x = F.relu(self.fc4(x))
        # 輸出層使用 Softmax 來輸出概率
        x = F.softmax(self.fc5(x), dim=-1)
        return x

# 設定模型各層維度
input_dim = 8    # 輸入
hidden_dim = 150  # 隱藏 , 像神經一樣的概念
output_dim = 5   # 輸出

# 轉換為 tensor 並移動到設備上
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device).squeeze()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device).squeeze()


# 訓練參數設置
batch_size = 128  # 批次大小
epochs = 100     # 訓練次數
lr = 5e-3        # 學習率
criterion = nn.CrossEntropyLoss()  # 損失函數
initial_tau = 5.0
final_tau = 0.1
tau_decay_rate = 0.99 # 每次 epoch 都降低 1%


# 模型建立並移動到 device (CPU 或 GPU)
model = SoftDecisionTree(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, tau=initial_tau).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 優化器
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)  # 調節學習率工具
print("\n*****Model Status*****")
print(model)  # 顯示模型結構

print("\n*****Training Status*****")
# 訓練模型
log_loss = 0
log_training_time = 0
train_start = time.time()
for epoch in range(1,epochs+1):
    model.train()  # 將模型設置為訓練模式
    model.tau = max(final_tau, initial_tau * (tau_decay_rate ** epoch))
    total_loss = 0 # 紀錄 LOSS
    start_time = time.time()  # 記錄當前epoch的開始時間
    for i in range(0, len(X_train_tensor), batch_size):
        batch_X = X_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的特徵數據
        batch_y = y_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的目標數據
        outputs = model(batch_X)  # 獲取模型的預測結果
        loss = criterion(outputs, batch_y)  # 計算模型的損失
        optimizer.zero_grad()  # 梯度反向傳播和參數更新
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()  # 每個epoch結束後調整學習率(記數)
    log_training_time += time.time() - start_time # 加上單次訓練時間
    log_loss += total_loss # 加上單次 LOSS
    # 每十次輸出訓練狀態 , 10 次的訓練時間總和 , 10 次的LOSS總和
    if epoch % 10 == 0:
        print('| epoch {:3d} | lr {:02.10f} | {:5.2f} ms | loss {:5.7f}'.format(
            epoch, scheduler.get_last_lr()[0], log_training_time * 1000, log_loss))
        log_training_time = 0 # 重新計時
        log_loss = 0 # 重新計算 LOSS

train_times = time.time() - train_start  # 計算整個訓練過程的總執行時間
print(f"Training cost: {train_times:.2f} seconds")  # 打印整個訓練過程的執行時間


# 混淆矩陣和評估指標
print("\n*****Eval Status*****")
model.eval()
test_outputs = model(X_test_tensor)

# 使用 torch.argmax 獲取預測標籤
predicted_labels = torch.argmax(test_outputs, dim=1).detach().cpu().numpy()
actual_labels = y_test_tensor.cpu().numpy()

# 打印混淆矩陣
labels = list(range(output_dim))
conf_matrix = confusion_matrix(actual_labels, predicted_labels, labels=labels)
print("Confusion Matrix:\n", conf_matrix)

# 打印 正確率 , 精確率 , 召回率 , F1-score
print(f"Accuracy: {accuracy_score(actual_labels, predicted_labels) * 100:.2f}%")
print(f"Precision: {precision_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"Recall: {recall_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"F1 Score: {f1_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")

*****CUDA Status*****
CUDA Available: True
CUDA Device: 0
CUDA Device Name: Tesla T4

*****Model Status*****
SoftDecisionTree(
  (fc1): Linear(in_features=8, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=150, bias=True)
  (fc3): Linear(in_features=150, out_features=150, bias=True)
  (fc4): Linear(in_features=150, out_features=150, bias=True)
  (fc5): Linear(in_features=150, out_features=5, bias=True)
)

*****Training Status*****
| epoch  10 | lr 0.0047500000 | 2044.20 ms | loss 1032.9462137
| epoch  20 | lr 0.0045125000 | 2352.58 ms | loss 789.7935590
| epoch  30 | lr 0.0042868750 | 2919.97 ms | loss 768.3578754
| epoch  40 | lr 0.0040725312 | 3346.16 ms | loss 765.3095654
| epoch  50 | lr 0.0038689047 | 1699.85 ms | loss 763.2318457
| epoch  60 | lr 0.0036754595 | 1699.27 ms | loss 761.6527844
| epoch  70 | lr 0.0034916865 | 1696.20 ms | loss 761.6656376
| epoch  80 | lr 0.0033171022 | 1688.76 ms | loss 759.7716464
| epoch  90 | lr 0.0031512470 | 1816.12 m

In [None]:
from huggingface_hub import HfApi, Repository
from safetensors.torch import save_file
import os
import shutil
# 模型保存
USER_NAME = "TsukiOwO"
MODEL_NAME = "soft_decision_tree"
model_save_path = f"{MODEL_NAME}.safetensors"
repo_name = f"{USER_NAME}/{MODEL_NAME}"
api_token = userdata.get('Colab_ALL')

# Hugging Face API
api = HfApi()


# 建構儲存庫
try:
    api.create_repo(repo_id=repo_name, token=api_token, private=False)
    print(f"儲存庫 '{repo_name}' 建立成功。")
except Exception as e:
    print(f"建構儲存庫操作時發生錯誤: {e}")

# 提取模型權重並保存為 safetensors 格式
model_weights = {name: tensor for name, tensor in model.state_dict().items()}  # 提取每個張量
save_file(model_weights, model_save_path)

# 檢查檔案是否成功保存
if os.path.exists(model_save_path):
    print(f"模型成功保存於 {model_save_path}。")
else:
    print(f"模型保存失敗於 {model_save_path}。")

# 設定 Git 用戶名和電子郵件
!git config --global user.email "a0985821880@gmail.com"
!git config --global user.name "TsukiSama9292"

# 上傳模型到 Hugging Face
try:
    repo = Repository(repo_name, clone_from=repo_name, token=api_token)
    print(f"已複製儲存庫: {repo_name}")

    # 拉取最新的更改
    repo.git_pull()

    # 移動檔案到儲存庫目錄
    shutil.move(model_save_path, os.path.join(repo.local_dir, model_save_path))

    # 檢查檔案是否存在
    if os.path.exists(os.path.join(repo.local_dir, model_save_path)):
        repo.git_add(model_save_path)
        repo.git_commit("新增 soft decision tree 模型於 safetensors 格式")
        repo.git_push()
        print(f"模型已上傳至 Hugging Face: {repo_name}")
    else:
        print(f"檔案 '{model_save_path}' 不存在於儲存庫目錄中，無法上傳。")
except Exception as e:
    print(f"儲存庫操作時發生錯誤: {e}")

建構儲存庫操作時發生錯誤: 409 Client Error: Conflict for url: https://huggingface.co/api/repos/create (Request ID: Root=1-67321e26-1a85e44e4863b5b007f17e6c;3f9aeafa-0d8d-4f4c-9c6c-5c814e2267e1)

You already created this model repo
模型成功保存於 soft_decision_tree.safetensors。


For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/TsukiOwO/soft_decision_tree into local empty directory.


已複製儲存庫: TsukiOwO/soft_decision_tree


To https://huggingface.co/TsukiOwO/soft_decision_tree
   de3195f..6cc47d1  main -> main

   de3195f..6cc47d1  main -> main



模型已上傳至 Hugging Face: TsukiOwO/soft_decision_tree


## 深度學習 - 測試模型 - B1143015 林宣佑, B1143028 詹朝成(架構修改)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
import time
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from huggingface_hub import HfApi, HfFolder, Repository
from safetensors.torch import save_file
from google.colab import userdata
# 檢測設備是否支援 CUDA 加速
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print("*****CUDA Status*****")
print(f"CUDA Available: {USE_CUDA}")
if USE_CUDA:
    print(f"CUDA Device: {torch.cuda.current_device()}")
    print(f"CUDA Device Name: {torch.cuda.get_device_name(device)}")

class ReLU(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ReLU, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return x


# 設定模型各層維度
input_dim = 8    # 輸入
output_dim = 5   # 輸出

# 轉換為 tensor 並移動到設備上
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device).squeeze()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device).squeeze()

# 訓練參數設置
batch_size = 128  # 批次大小
epochs = 100     # 訓練次數
lr = 5e-3        # 學習率
criterion = nn.CrossEntropyLoss()  # 損失函數

# 模型建立並移動到 device (CPU 或 GPU)
model = ReLU(input_dim=input_dim, output_dim=output_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 優化器
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)  # 調節學習率工具
print("\n*****Model Status*****")
print(model)  # 顯示模型結構

print("\n*****Training Status*****")
# 訓練模型
log_loss = 0
log_training_time = 0
train_start = time.time()
for epoch in range(1,epochs+1):
    model.train()  # 將模型設置為訓練模式
    total_loss = 0 # 紀錄 LOSS
    start_time = time.time()  # 記錄當前epoch的開始時間
    for i in range(0, len(X_train_tensor), batch_size):
        batch_X = X_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的特徵數據
        batch_y = y_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的目標數據
        outputs = model(batch_X)  # 獲取模型的預測結果
        loss = criterion(outputs, batch_y)  # 計算模型的損失
        optimizer.zero_grad()  # 梯度反向傳播和參數更新
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()  # 每個epoch結束後調整學習率(記數)
    log_training_time += time.time() - start_time # 加上單次訓練時間
    log_loss += total_loss # 加上單次 LOSS
    # 每十次輸出訓練狀態 , 10 次的訓練時間總和 , 10 次的LOSS總和
    if epoch % 10 == 0:
        print('| epoch {:3d} | lr {:02.10f} | {:5.2f} ms | loss {:5.7f}'.format(
            epoch, scheduler.get_last_lr()[0], log_training_time * 1000, log_loss))
        log_training_time = 0 # 重新計時
        log_loss = 0 # 重新計算 LOSS

train_times = time.time() - train_start  # 計算整個訓練過程的總執行時間
print(f"Training cost: {train_times:.2f} seconds")  # 打印整個訓練過程的執行時間


# 混淆矩陣和評估指標
print("\n*****Eval Status*****")
model.eval()
test_outputs = model(X_test_tensor)

# 使用 torch.argmax 獲取預測標籤
predicted_labels = torch.argmax(test_outputs, dim=1).detach().cpu().numpy()
actual_labels = y_test_tensor.cpu().numpy()

# 打印混淆矩陣
labels = list(range(output_dim))
conf_matrix = confusion_matrix(actual_labels, predicted_labels, labels=labels)
print("Confusion Matrix:\n", conf_matrix)

# 打印 正確率 , 精確率 , 召回率
print(f"Accuracy: {accuracy_score(actual_labels, predicted_labels) * 100:.2f}%")
print(f"Precision: {precision_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"Recall: {recall_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"F1 Score: {f1_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")

*****CUDA Status*****
CUDA Available: True
CUDA Device: 0
CUDA Device Name: Tesla T4

*****Model Status*****
ReLU(
  (fc1): Linear(in_features=8, out_features=5, bias=True)
)

*****Training Status*****
| epoch  10 | lr 0.0047500000 | 768.83 ms | loss 591.8643637
| epoch  20 | lr 0.0045125000 | 786.69 ms | loss 329.0077802
| epoch  30 | lr 0.0042868750 | 990.10 ms | loss 268.7108512
| epoch  40 | lr 0.0040725312 | 765.89 ms | loss 236.6843954
| epoch  50 | lr 0.0038689047 | 1069.31 ms | loss 217.2248575
| epoch  60 | lr 0.0036754595 | 771.54 ms | loss 204.7096278
| epoch  70 | lr 0.0034916865 | 779.90 ms | loss 196.3564617
| epoch  80 | lr 0.0033171022 | 866.48 ms | loss 190.6130126
| epoch  90 | lr 0.0031512470 | 784.72 ms | loss 186.5673067
| epoch 100 | lr 0.0029936847 | 761.42 ms | loss 183.6618306
Training cost: 8.35 seconds

*****Eval Status*****
Confusion Matrix:
 [[  0   0   0   2   0]
 [  0 791   0   2  80]
 [  0   0 870   0   0]
 [  0  46   0  16   0]
 [  0  99   0   0 686]]
A

## 深度學習 - 自注意力模型 - B1143015 林宣佑, B1143028 詹朝成(架構修改)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
import time
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from huggingface_hub import HfApi, HfFolder, Repository
from safetensors.torch import save_file
from google.colab import userdata
# 檢測設備是否支援 CUDA 加速
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print("*****CUDA Status*****")
print(f"CUDA Available: {USE_CUDA}")
if USE_CUDA:
    print(f"CUDA Device: {torch.cuda.current_device()}")
    print(f"CUDA Device Name: {torch.cuda.get_device_name(device)}")

class SelfAttention(nn.Module):
    def __init__(self, input_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(input_dim, input_dim)
        self.key = nn.Linear(input_dim, input_dim)
        self.value = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)

        attention_scores = torch.matmul(q, k.transpose(-2, -1)) / np.sqrt(x.size(-1))  # 計算注意力權重
        attention_probs = torch.softmax(attention_scores, dim=-1)

        return torch.matmul(attention_probs, v)  # 聚焦重要特徵

# 定義結合模型：自注意力 + ReLU
class AttentionWithReLU(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(AttentionWithReLU, self).__init__()
        self.relu1 = nn.Linear(input_dim, hidden_dim)
        self.attention = SelfAttention(hidden_dim)
        self.relu2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.relu1(x))
        x = self.attention(x)  # 自注意力層處理輸入
        x = F.relu(self.relu2(x))
        return x

# 設定模型各層維度
input_dim = 8    # 輸入
hidden_dim = 100  # 隱藏 , 像神經一樣的概念
output_dim = 5   # 輸出

# 轉換為 tensor 並移動到設備上
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device).squeeze()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device).squeeze()

# 訓練參數設置
batch_size = 128  # 批次大小
epochs = 100     # 訓練次數
lr = 5e-3        # 學習率
criterion = nn.CrossEntropyLoss()  # 損失函數

# 模型建立並移動到 device (CPU 或 GPU)
model = AttentionWithReLU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 優化器
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)  # 調節學習率工具
print("\n*****Model Status*****")
print(model)  # 顯示模型結構

print("\n*****Training Status*****")
# 訓練模型
log_loss = 0
log_training_time = 0
train_start = time.time()
for epoch in range(1,epochs+1):
    model.train()  # 將模型設置為訓練模式
    total_loss = 0 # 紀錄 LOSS
    start_time = time.time()  # 記錄當前epoch的開始時間
    for i in range(0, len(X_train_tensor), batch_size):
        batch_X = X_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的特徵數據
        batch_y = y_train_tensor[i:i+batch_size]  # 從訓練集中取出一個批次的目標數據
        outputs = model(batch_X)  # 獲取模型的預測結果
        loss = criterion(outputs, batch_y)  # 計算模型的損失
        optimizer.zero_grad()  # 梯度反向傳播和參數更新
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()  # 每個epoch結束後調整學習率(記數)
    log_training_time += time.time() - start_time # 加上單次訓練時間
    log_loss += total_loss # 加上單次 LOSS
    # 每十次輸出訓練狀態 , 10 次的訓練時間總和 , 10 次的LOSS總和
    if epoch % 10 == 0:
        print('| epoch {:3d} | lr {:02.10f} | {:5.2f} ms | loss {:5.7f}'.format(
            epoch, scheduler.get_last_lr()[0], log_training_time * 1000, log_loss))
        log_training_time = 0 # 重新計時
        log_loss = 0 # 重新計算 LOSS

train_times = time.time() - train_start  # 計算整個訓練過程的總執行時間
print(f"Training cost: {train_times:.2f} seconds")  # 打印整個訓練過程的執行時間


# 混淆矩陣和評估指標
print("\n*****Eval Status*****")
model.eval()
test_outputs = model(X_test_tensor)

# 使用 torch.argmax 獲取預測標籤
predicted_labels = torch.argmax(test_outputs, dim=1).detach().cpu().numpy()
actual_labels = y_test_tensor.cpu().numpy()

# 打印混淆矩陣
labels = list(range(output_dim))
conf_matrix = confusion_matrix(actual_labels, predicted_labels, labels=labels)
print("Confusion Matrix:\n", conf_matrix)

# 打印 正確率 , 精確率 , 召回率
print(f"Accuracy: {accuracy_score(actual_labels, predicted_labels) * 100:.2f}%")
print(f"Precision: {precision_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"Recall: {recall_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")
print(f"F1 Score: {f1_score(actual_labels, predicted_labels, average='weighted', zero_division=0) * 100:.2f}%")

*****CUDA Status*****
CUDA Available: True
CUDA Device: 0
CUDA Device Name: Tesla T4

*****Model Status*****
AttentionWithReLU(
  (relu1): Linear(in_features=8, out_features=100, bias=True)
  (attention): SelfAttention(
    (query): Linear(in_features=100, out_features=100, bias=True)
    (key): Linear(in_features=100, out_features=100, bias=True)
    (value): Linear(in_features=100, out_features=100, bias=True)
  )
  (relu2): Linear(in_features=100, out_features=5, bias=True)
)

*****Training Status*****
| epoch  10 | lr 0.0047500000 | 1849.44 ms | loss 187.1027194
| epoch  20 | lr 0.0045125000 | 1508.15 ms | loss 44.3221867
| epoch  30 | lr 0.0042868750 | 1508.10 ms | loss 38.7977894
| epoch  40 | lr 0.0040725312 | 1517.08 ms | loss 14.7306274
| epoch  50 | lr 0.0038689047 | 1492.63 ms | loss 11.1443813
| epoch  60 | lr 0.0036754595 | 1510.44 ms | loss 6.9109536
| epoch  70 | lr 0.0034916865 | 1525.84 ms | loss 0.0401645
| epoch  80 | lr 0.0033171022 | 1911.07 ms | loss 0.0144728
| e