In [5]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
#
# # 定义模型
# class SimpleNN(nn.Module):
#     def __init__(self):
#         super(SimpleNN, self).__init__()
#         self.fc1 = nn.Linear(1024, 512)  # 第一层全连接层，从 1024 到 512
#         self.fc2 = nn.Linear(512, 128)   # 第二层全连接层，从 512 到 128
#         self.fc3 = nn.Linear(128, 1)     # 第三层全连接层，从 128 到 1
#
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))  # 激活函数为 ReLU
#         x = torch.relu(self.fc2(x))
#
#         x = torch.sigmoid(self.fc3(x))  # 最后一层使用 Sigmoid 输出概率
#         return x
#
# # 实例化模型
# model = SimpleNN()
#
# # 定义损失函数和优化器
# criterion = nn.BCELoss()  # 二元交叉熵损失
# optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam 优化器


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

class Model(nn.Module):
    def __init__(self, init_weights=True):
        super(Model, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(1, 8)),
            nn.ReLU(inplace=True),
        )
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=2)
        self.classifier = nn.Sequential(
            nn.Linear(128, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 1),
            nn.Sigmoid(),
        )


    def forward(self, x):
        x = x.view(-1, 128, 8)
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = x.squeeze(3)
        x = self.encoder_layer(x)
        x = torch.mean(x, dim=1)
        x = self.classifier(x)
        return x

model = Model()

# 定义损失函数和优化器
criterion = nn.BCELoss()  # 二元交叉熵损失
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam 优化器

In [2]:
from torch.utils.data import DataLoader, Dataset
import pandas as pd
class Dataset_CAR(Dataset):
    def __init__(self, flag='train', csv_paths = []):
        assert flag in ['train', 'test'] # flag 必须是train  test 之间的其中一个
        self.flag = flag
        self.__load_data__(csv_paths)

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.y)

    def __load_data__(self, csv_paths: list):
        # 读取 排列按照 train feature train label test feature test label
        self.x = torch.tensor(pd.read_csv(csv_paths[0]).values)
        self.y = torch.tensor(pd.read_csv(csv_paths[1], header = None).values) # 因为 label的表头是没有的，所以使用 header  = None
        print("feature shape: {}, label shape: {}".format(self.x.shape, self.y.shape))

In [3]:
csv_path_train = ['after_aug_X.csv', 'after_aug_y.csv']
train_dataset = Dataset_CAR(flag='train', csv_paths=csv_path_train)
train_dataloader = DataLoader(dataset=train_dataset, batch_size = 2048, shuffle=True)

feature shape: torch.Size([8034, 1024]), label shape: torch.Size([8034, 1])


In [4]:
csv_path_test = ["K_test_feature.csv", "K_test_y.csv"]
test_dataset = Dataset_CAR(flag='test', csv_paths=csv_path_test)
test_dataloader = DataLoader(dataset=test_dataset, batch_size = 2048, shuffle=True)

feature shape: torch.Size([7556, 1024]), label shape: torch.Size([7556, 1])


In [5]:
from sklearn.metrics import roc_auc_score
from Measurement import compAUC
from Measurement import SN_SP_MCC

In [6]:
num_epochs = 1000  # 训练轮数


for epoch in range(num_epochs):
    model.train()  # 设置模型为训练模式
    for idx, (data, labels) in enumerate(train_dataloader, 0):
        # 前向传播
        data  = data.type(torch.float32)
        outputs = model(data)
        labels = labels.type(torch.float32)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 验证
    model.eval()  # 设置模型为评估模式
    with torch.no_grad():  # 在验证阶段不计算梯度
        correct = 0
        total = 0
        outputs_list = []
        labels_list = []
        pred_list = []
        for idx, (data, labels) in enumerate(test_dataloader, 0):
            data = data.type(torch.float32)
            labels = labels.type(torch.float32)
            outputs = model(data)
            predicted = (outputs > 0.5).float()  # 阈值设为 0.5
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            outputs_list.append(outputs)
            labels_list.append(labels)
            pred_list.append(predicted)

        all_outputs = torch.cat(outputs_list, dim=0)
        all_labels = torch.cat(labels_list, dim=0)
        all_pred = torch.cat(pred_list, dim=0)


    SN_SP_MCC(all_labels, all_pred)
    AUC = compAUC(all_labels, all_outputs)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, '
          f'Accuracy: {100 * correct / total:.2f}%')
    print(AUC)


TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.20227306 0.2008057  0.20062795 ... 0.20076244 0.2002784  0.20213361]
Epoch [1/1000], Loss: 0.5374, Accuracy: 98.45%
0.6239586241602642
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.24735364 0.24700215 0.24769567 ... 0.2495841  0.24978516 0.24864523]
Epoch [2/1000], Loss: 0.5299, Accuracy: 98.45%
0.682485353812145
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.19958884 0.19986969 0.1973235  ... 0.19974037 0.19741616 0.19977176]
Epoch [3/1000], Loss: 0.5578, Accuracy: 98.45%
0.6867542623020509
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 1 0 ... 0 0 0]
[0.2229518  0.22230881 0.22259627 ... 0.223054   0.22008093 0.21994884]
Epoch [4/1000], Loss: 0.5497, Accur

KeyboardInterrupt: 