In [13]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

import random
seed = 11032
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [14]:
class Model(nn.Module):
    def __init__(self, init_weights=True):
        super(Model, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(1, 8)),
            nn.ReLU(inplace=True),
        )
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=2)
        self.classifier = nn.Sequential(
            nn.Linear(128, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 1),
            nn.Sigmoid(),
        )


    def forward(self, x):
        x = x.view(-1, 128, 8)
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = x.squeeze(3)
        x = self.encoder_layer(x)
        x = torch.mean(x, dim=1)
        x = self.classifier(x)
        return x

In [15]:
class argparse():
    pass

args = argparse()
args.epochs, args.learning_rate, args.train_batch_size, args.test_batch_size = [1000, 0.001, 2048, 2048]
args.device, = [torch.device("cuda:0" if torch.cuda.is_available() else "cpu")]

In [16]:
class Dataset_CAR(Dataset):
    def __init__(self, flag='train', csv_paths = []):
        assert flag in ['train', 'test'] # flag 必须是train  test 之间的其中一个
        self.flag = flag
        self.__load_data__(csv_paths)

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.y)

    def __load_data__(self, csv_paths: list):
        # 读取 排列按照 train feature train label test feature test label
        self.x = torch.tensor(pd.read_csv(csv_paths[0]).values)
        self.y = torch.tensor(pd.read_csv(csv_paths[1], header = None).values) # 因为 label的表头是没有的，所以使用 header  = None
        print("feature shape: {}, label shape: {}".format(self.x.shape, self.y.shape))

In [17]:
csv_path_train = ['K_onlySMOTE_x.csv', 'K_onlySMOTE_y.csv']
train_dataset = Dataset_CAR(flag='train', csv_paths=csv_path_train)
train_dataloader = DataLoader(dataset=train_dataset, batch_size = args.train_batch_size, shuffle=True)

feature shape: torch.Size([33175, 1024]), label shape: torch.Size([33175, 1])


In [18]:
csv_path_test = ["K_test_feature.csv", "K_test_y.csv"]
test_dataset = Dataset_CAR(flag='test', csv_paths=csv_path_test)
test_dataloader = DataLoader(dataset=test_dataset, batch_size = args.test_batch_size, shuffle=True)

feature shape: torch.Size([7556, 1024]), label shape: torch.Size([7556, 1])


In [19]:
model = Model().to(args.device)
criterion = nn.BCELoss()  # 二元交叉熵损失
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)  # Adam 优化器

train_loss = []
test_loss = []
train_epochs_loss = []
test_epochs_loss = []

In [10]:
# 开始训练
# 需要重新进行修改
# for epoch in range(args.epochs):
#     model.train()
#     train_epoch_loss = []
#     for idx, (data_x, data_y) in enumerate(train_dataloader, 0):
#         data_x = data_x.to(torch.float32).to(args.device)
#         data_y = data_y.to(torch.float32).to(args.device)
#         outputs = model(data_x)
#         print(outputs) # output 输出的内容为 nan
#         loss = criterion(data_y, outputs)
#         loss.backward()
#         optimizer.step()
#
#         train_epoch_loss.append(loss.item())
#         train_loss.append(loss.item())
#
#         if idx %(len(train_dataloader)//2)==0:
#             print("epoch={}/{},{}/{}of train, loss={}".format(epoch, args.epochs, idx, len(train_dataloader),loss.item()))
#     train_epochs_loss.append(np.average(train_epoch_loss))
#
#     #=====================valid============================
#
#     model.eval()
#     test_epoch_loss = []
#     for idx, (data_x, data_y) in enumerate(test_dataloader, 0):
#         data_x = data_x.to(torch.float32).to(args.device)
#         data_y = data_y.to(torch.float32).to(args.device)
#
#         outputs = model(data_x)
#
#         loss = criterion(data_y, outputs)
#         test_epoch_loss.append(loss.item())
#         test_loss.append(loss.item())
#
#     test_epochs_loss.append(np.average(test_epoch_loss))

tensor([[0.4778],
        [0.4723],
        [0.4875],
        ...,
        [0.4720],
        [0.4798],
        [0.4735]], grad_fn=<SigmoidBackward0>)
epoch=0/100,0/33of train, loss=48.61852264404297
tensor([[nan],
        [nan],
        [nan],
        ...,
        [nan],
        [nan],
        [nan]], grad_fn=<SigmoidBackward0>)
tensor([[nan],
        [nan],
        [nan],
        ...,
        [nan],
        [nan],
        [nan]], grad_fn=<SigmoidBackward0>)
tensor([[nan],
        [nan],
        [nan],
        ...,
        [nan],
        [nan],
        [nan]], grad_fn=<SigmoidBackward0>)


KeyboardInterrupt: 

In [20]:
from Measurement import compAUC
from Measurement import SN_SP_MCC

for epoch in range(args.epochs):
    model.train()  # 设置模型为训练模式
    for idx, (data, labels) in enumerate(train_dataloader, 0):
        # 前向传播
        data  = data.type(torch.float32).to(args.device)
        outputs = model(data).to(args.device)
        labels = labels.type(torch.float32).to(args.device)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 验证
    model.eval()  # 设置模型为评估模式
    with torch.no_grad():  # 在验证阶段不计算梯度
        correct = 0
        total = 0
        outputs_list = []
        labels_list = []
        pred_list = []
        for idx, (data, labels) in enumerate(test_dataloader, 0):
            data = data.type(torch.float32).to(args.device)
            labels = labels.type(torch.float32).to(args.device)
            outputs = model(data)
            predicted = (outputs > 0.5).float()  # 阈值设为 0.5
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            outputs_list.append(outputs)
            labels_list.append(labels)
            pred_list.append(predicted)

        all_outputs = torch.cat(outputs_list, dim=0)
        all_labels = torch.cat(labels_list, dim=0)
        all_pred = torch.cat(pred_list, dim=0)


    SN_SP_MCC(all_labels, all_pred)
    AUC = compAUC(all_labels, all_outputs)
    print(f'Epoch [{epoch+1}/{args.epochs}], Loss: {loss.item():.4f}, '
          f'Accuracy: {100 * correct / total:.2f}%')
    print(AUC)

TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.1851887  0.18460917 0.18422025 ... 0.18475303 0.18436344 0.18424201]
Epoch [1/1000], Loss: 0.4426, Accuracy: 98.45%
0.5558117704911629
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.17504834 0.1753831  0.17524515 ... 0.17521288 0.17505042 0.1751945 ]
Epoch [2/1000], Loss: 0.4925, Accuracy: 98.45%
0.5829251703025059
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 0 0 0]
[0.17923395 0.17918786 0.17913723 ... 0.1793927  0.17918591 0.17956166]
Epoch [3/1000], Loss: 0.4388, Accuracy: 98.45%
0.5876645721382917
TP: 0, FP: 0, Sensitivity (SN): 0.0, Specificity (SP): 1.0, Matthews Correlation Coefficient (MCC): 0
[0 0 0 ... 1 0 0]
[0.19724391 0.19654565 0.19650452 ... 0.19568844 0.19675286 0.19625108]
Epoch [4/1000], Loss: 0.4990, Accu

KeyboardInterrupt: 