In [5]:
!pip install ptflops
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import accuracy_score,f1_score, confusion_matrix, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import seaborn as sns
from ptflops import get_model_complexity_info
import time
# 1. 数据加载与预处理
# 读取数据
data = pd.read_csv('/kaggle/input/mitbih/ECG_data.csv')

# 提取信号和标签
signals = data['Signal'].apply(lambda x: np.fromstring(x[1:-1], sep=',')).values
labels = data['Label'].values

# 转换信号为NumPy数组
signals = np.array([np.array(signal) for signal in signals])

# 标签编码
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
# 数据分割
X_train, X_temp, y_train, y_temp = train_test_split(
    signals, labels_encoded, test_size=0.3, random_state=42, stratify=labels_encoded)

# 第二步：将临时集中的 2/3 设为验证集（20%），1/3 设为测试集（10%）
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=1/3, random_state=42, stratify=y_temp)

# 检查划分比例
print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

# 2. 转换输入格式
X_train = X_train.reshape(-1, 1, 300)
# 每个样本300个时间步，每个时间步一个特征 (channels=1, seq_len=300)
X_val = X_val.reshape(-1, 1, 300)
X_test = X_test.reshape(-1, 1, 300)


# 转换为Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# 创建DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# 3. 构建 ResNet + LSTM 模型（PyTorch实现）
class ResNetLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_classes=5):
        super(ResNetLSTM, self).__init__()
        self.conv1 = nn.Conv1d(input_size, 32, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.2)
        
        # ResNet block
        self.resnet_block1 = self._resnet_block(32, 32)
        self.resnet_block2 = self._resnet_block(32, 32)
        self.resnet_block3 = self._resnet_block(32, 32)
        self.resnet_block4 = self._resnet_block(32, 32)
        
        # Global feature learning layer (Global Average Pooling)
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global feature vector
        
        # LSTM layer
        self.lstm = nn.LSTM(32, hidden_size, batch_first=True)
        
        # Fully connected layer for classification
        self.fc = nn.Linear(hidden_size, num_classes)

    def _resnet_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Dropout(0.2)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.dropout(x)
        
        # Passing through ResNet blocks
        residual = x
        x = self.resnet_block1(x)
        x = x + residual
        residual = x
        x = self.resnet_block2(x)
        x = x + residual
        residual = x
        x = self.resnet_block3(x)
        x = x + residual
        residual = x
        x = self.resnet_block4(x)
        x = x + residual
        
        # Global feature learning
        x = self.global_avg_pool(x)
        
        # LSTM
        x, (h_n, c_n) = self.lstm(x.view(x.size(0), -1, 32))  # Flatten the tensor before LSTM
        x = x[:, -1, :]  # Only take the output of the last time step
        
        # Fully connected layer for classification
        x = self.fc(x)
        return x

# Instantiate the model
model = ResNetLSTM(input_size=1, hidden_size=64, num_classes=5)

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2, num_classes=5):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.num_classes = num_classes
        self.epsilon = 1e-6  # 防止除零错误

    def forward(self, inputs, targets):
        inputs = torch.softmax(inputs, dim=1)
        targets = torch.zeros_like(inputs).scatter_(1, targets.view(-1, 1), 1)
        p_t = (inputs * targets).sum(dim=1) + self.epsilon
        loss = -self.alpha * (1 - p_t) ** self.gamma * torch.log(p_t)
        return loss.mean()

# 4. 设置损失函数、优化器和学习率调度器
criterion = FocalLoss(alpha=0.25, gamma=2, num_classes=5)  # 使用 Focal Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)  # 每10轮学习率减半
# 选择设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def compute_test_metrics(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    all_targets = []
    all_predictions = []
    start_time = time.time()  # ✅ 开始计时
    
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            all_targets.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
            
    end_time = time.time()  # ✅ 结束计时
    elapsed_time = end_time - start_time
    accuracy = 100 * correct / total
    f1 = f1_score(all_targets, all_predictions, average='weighted')
    sensitivity = recall_score(all_targets, all_predictions, average='weighted')
    precision = precision_score(all_targets, all_predictions, average='weighted')

    conf_matrix = confusion_matrix(all_targets, all_predictions)
    tn = conf_matrix.sum() - conf_matrix.sum(axis=0) - conf_matrix.sum(axis=1) + np.diagonal(conf_matrix)
    fp = conf_matrix.sum(axis=0) - np.diagonal(conf_matrix)
    specificity_per_class = tn / (tn + fp + 1e-6)
    samples_per_class = conf_matrix.sum(axis=1)
    total_samples = np.sum(samples_per_class)
    specificity = np.sum((samples_per_class / total_samples) * specificity_per_class)

    return accuracy, f1, sensitivity, precision, specificity, conf_matrix,elapsed_time

def plot_acc_loss(history):
    epochs = range(1, len(history['train_acc']) + 1)

    fig, ax1 = plt.subplots(figsize=(10, 6))
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy (%)', color='tab:blue')
    ax1.plot(epochs, history['train_acc'], label='Train Accuracy', color='tab:blue', linestyle='-')
    ax1.plot(epochs, history['val_acc'], label='Val Accuracy', color='tab:blue', linestyle='--')
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.set_ylim(90,100)
    ax1.grid(True)
    
    ax2 = ax1.twinx()
    ax2.set_ylabel('Loss', color='tab:red')
    ax2.plot(epochs, history['train_loss'], label='Train Loss', color='tab:red', linestyle='-')
    ax2.plot(epochs, history['val_loss'], label='Val Loss', color='tab:red', linestyle='--')
    ax2.tick_params(axis='y', labelcolor='tab:red')
    ax2.set_ylim(0,0.03)
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='center right')

    plt.title('Accuracy and Loss over Epochs')
    fig.tight_layout()
    plt.savefig("Resnet+LSTM Accuracy_Loss Dual Axis.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
    plt.show()


def train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, num_epochs):
    history = {
        'train_acc': [], 'val_acc': [],
        'train_loss': [], 'val_loss': []
    }
    best_val_loss = float('inf') 
    best_model_path = 'Resnet+LSTM_best_model.pth'
    for epoch in range(num_epochs):
        # === Training ===
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for data, target in train_loader:
            optimizer.zero_grad()
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct_train / total_train
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)      
        # === Validation ===
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for data, target in val_loader:
                data = data.to(device)
                target = target.to(device)
                output = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()
                _, predicted = torch.max(output, 1)
                total_val += target.size(0)
                correct_val += (predicted == target).sum().item()

        val_loss /= len(val_loader)
        val_acc = 100 * correct_val / total_val
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"  Train -> Acc: {train_acc:.2f}% | Loss: {train_loss:.4f}")
        print(f"  Val   -> Acc: {val_acc:.2f}% | Loss: {val_loss:.4f}")
       # === Save Best Model (based on validation loss) ===
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            print("  ✅ Best model saved (state_dict).")
        else:
            print("  No improvement.")

    # === 重新加载最优模型进行测试 ===
    model.load_state_dict(torch.load(best_model_path))
    model.eval()

    # === 测试集评估 ===
    start_time = time.time()
    test_acc, test_f1, test_sen, test_ppv, test_spe, conf_matrix, test_time = compute_test_metrics(model, test_loader)
    test_time = time.time() - start_time

    print("\n=== Final Test Set Performance ===")
    print(f"Test Acc: {test_acc:.2f}% | F1: {test_f1:.4f} | Sensitivity: {test_sen:.4f} | "
          f"PPV: {test_ppv:.4f} | Specificity: {test_spe:.4f}")
    print(f"🕒 Inference Time on Test Set: {test_time:.2f} seconds")

    # === 计算模型参数量和 FLOPs ===
    print("\n=== Model Complexity ===")
    macs, params = get_model_complexity_info(
        model, input_res=(1, 300), as_strings=True,
        print_per_layer_stat=False, verbose=False
    )
    print(f"📊 Params: {params}")
    print(f"📊 FLOPs: {macs}")

    # === 保存模型结构复杂度信息到文件 ===
    with open("Resnet+LSTM_Model_Complexity.txt", "w") as f:
        f.write(f"Params: {params}\nFLOPs: {macs}\nTest Time: {test_time:.2f} sec")

    return model, history

train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, num_epochs=50)
        

Train: 76483, Val: 21852, Test: 10927
Epoch [1/50]
  Train -> Acc: 83.49% | Loss: 0.0642
  Val   -> Acc: 85.76% | Loss: 0.0504
  ✅ Best model saved (state_dict).
Epoch [2/50]
  Train -> Acc: 86.06% | Loss: 0.0479
  Val   -> Acc: 86.84% | Loss: 0.0447
  ✅ Best model saved (state_dict).
Epoch [3/50]
  Train -> Acc: 87.30% | Loss: 0.0435
  Val   -> Acc: 87.60% | Loss: 0.0409
  ✅ Best model saved (state_dict).
Epoch [4/50]
  Train -> Acc: 87.56% | Loss: 0.0406
  Val   -> Acc: 88.07% | Loss: 0.0391
  ✅ Best model saved (state_dict).
Epoch [5/50]
  Train -> Acc: 88.38% | Loss: 0.0380
  Val   -> Acc: 90.78% | Loss: 0.0321
  ✅ Best model saved (state_dict).
Epoch [6/50]
  Train -> Acc: 91.58% | Loss: 0.0300
  Val   -> Acc: 91.94% | Loss: 0.0271
  ✅ Best model saved (state_dict).
Epoch [7/50]
  Train -> Acc: 92.00% | Loss: 0.0275
  Val   -> Acc: 92.59% | Loss: 0.0250
  ✅ Best model saved (state_dict).
Epoch [8/50]
  Train -> Acc: 92.22% | Loss: 0.0262
  Val   -> Acc: 92.45% | Loss: 0.0264
  No 

(ResNetLSTM(
   (conv1): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
   (dropout): Dropout(p=0.2, inplace=False)
   (resnet_block1): Sequential(
     (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (1): ReLU()
     (2): Dropout(p=0.2, inplace=False)
     (3): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (4): ReLU()
     (5): Dropout(p=0.2, inplace=False)
   )
   (resnet_block2): Sequential(
     (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (1): ReLU()
     (2): Dropout(p=0.2, inplace=False)
     (3): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (4): ReLU()
     (5): Dropout(p=0.2, inplace=False)
   )
   (resnet_block3): Sequential(
     (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (1): ReLU()
     (2): Dropout(p=0.2, inplace=False)
     (3): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
     (4): ReLU()
     (5): Dropout(p=0.2, inplace=False)
   )