In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder, DatasetFolder
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from glob import glob
import gc
import sys
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

if torch.cuda.is_available():
    device = torch.device("cuda")  # using first available GPU
    print("GPU is available.")
else:
    device = torch.device("cpu")
    print("GPU is not available. Switching to CPU.")

GPU is available.


In [2]:
# show images and labels
def show_images(images, labels):
    plt.figure(figsize=(12, 6))
    for i in range(len(images)):
        plt.subplot(4, 8, i + 1)
        plt.imshow(images[i].permute(1, 2, 0))
        plt.title(labels[labels[i]])
        plt.axis('off')
    plt.show()

In [3]:
# 定义数据变换
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
batch_size = 30
# 加载图像数据集
trainDataset = ImageFolder(root='/kaggle/input/7015-dataset/train/train', transform=transform)
trainLoader = DataLoader(trainDataset, batch_size=batch_size, shuffle=False)
testDataset = ImageFolder(root='/kaggle/input/7015-dataset/test/test', transform=transform)
testLoader = DataLoader(testDataset, batch_size=batch_size, shuffle=False)
labels = trainDataset.classes

In [5]:
train_samples = torch.tensor([]).to(device)
train_labels = torch.tensor([]).to(device)
test_samples = torch.tensor([]).to(device)
test_labels = torch.tensor([]).to(device)

In [6]:
for images, labels in tqdm(trainLoader):
    images = images.to(device)
    train_samples = torch.cat([train_samples, images.unsqueeze(0)], dim=0)
    label = list(map(lambda x: x.split('_')[1], trainDataset.class_to_idx.keys()))[labels[0]]
    if label == 'arm':
        label = 0
    else:
        label = 1
    train_labels = torch.cat([train_labels, torch.tensor(label).unsqueeze(0).to(device)], dim=0)

for images, labels in tqdm(testLoader):
    images = images.to(device)
    test_samples = torch.cat([test_samples, images.unsqueeze(0)], dim=0)
    label = list(map(lambda x: x.split('_')[1], testDataset.class_to_idx.keys()))[labels[0]]
    if label == 'arm':
        label = 0
    else:
        label = 1
    test_labels = torch.cat([test_labels, torch.tensor(label).unsqueeze(0).to(device)], dim=0)


100%|██████████| 436/436 [03:15<00:00,  2.23it/s]
100%|██████████| 109/109 [00:43<00:00,  2.53it/s]


In [7]:
class myDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]


1. **保留resnet的fc层，除此以外冻结 1000-d向量，与lstm模型连接进行权重更新**
2. **删除fc层，仅做特征提取cnn，不冻结，resnet部分进行fine-tuning，与lstm模型连接进行权重更新**
3. **与光流图（optical-flow）另外一个resnet提取，提取出的特征做向量拼接**

In [8]:
import torch
import torch.nn as nn
import torchvision.models as models

class CNNLSTM(nn.Module):
    def __init__(self, num_classes, lstm_hidden_size, lstm_num_layers, lstm_input_size):
        super(CNNLSTM, self).__init__()

        # 加载预训练的ResNet模型
        self.resnet = models.resnet18(pretrained=True)
        # 移除ResNet的最后一个全连接层，用于特征提取
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        
#         for name, param in self.resnet.named_parameters():
#             if "fc" not in name:  # 不冻结全连接层的参数
#                 param.requires_grad = False

        # LSTM模型
        self.lstm = nn.LSTM(input_size=lstm_input_size, hidden_size=lstm_hidden_size,
                            num_layers=lstm_num_layers, batch_first=True)

        # 全连接层，用于分类
        self.fc = nn.Linear(lstm_hidden_size, num_classes)

    def forward(self, x):
        # x shape: (batch_size, num_frames, C, H, W)
        batch_size, num_frames, C, H, W = x.size()

        # 将每一帧通过CNN进行特征提取
        cnn_features = torch.zeros(batch_size, num_frames, lstm_input_size).to(x.device)
        for i in range(num_frames):
            frame_features = self.resnet(x[:, i, :, :, :])
            cnn_features[:, i, :] = frame_features.squeeze(-1).squeeze(-1)

        # 将特征输入LSTM模型
        lstm_out, _ = self.lstm(cnn_features)

        # 取LSTM的最后一个时间步的输出
        lstm_last_output = lstm_out[:, -1, :]

        # 全连接层
        output = self.fc(lstm_last_output)

        return output


In [9]:
lstm_batch_size = 8
trainDataset = myDataset(train_samples, train_labels)
trainLoader = DataLoader(trainDataset, batch_size=lstm_batch_size, shuffle=True)
testDataset = myDataset(test_samples, test_labels)
testLoader = DataLoader(testDataset, batch_size=lstm_batch_size, shuffle=False)

In [10]:
for inputs, labels in trainLoader:
    print(inputs.shape)
    break

torch.Size([8, 30, 3, 224, 224])


In [11]:
# 创建模型实例
num_classes = 2 
lstm_hidden_size = 4
lstm_num_layers = 3
lstm_input_size = 512
model = CNNLSTM(num_classes, lstm_hidden_size, lstm_num_layers, lstm_input_size)
model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-3)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 178MB/s]


In [12]:
import random
import os
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 使用这个函数来设置一个全局种子
seed_everything(42)

# 初始化最佳性能指标
least_eval_loss = 100

for epoch in range(30):
    model.train()
    train_correct = 0
    train_predict = []
    train_label = []
    train_predict_prob = []
    for inputs, labels in trainLoader:
        optimizer.zero_grad()
        # 将数据输入模型
        train_outputs = model(inputs.to(device))
        _, train_predicted = torch.max(train_outputs, 1)
        train_correct += sum(torch.argmax(train_outputs, dim=1) == labels)
        # 计算损失
        train_loss = criterion(train_outputs, labels.to(torch.long))
        train_predict.extend(list(train_predicted.cpu().numpy()))
        train_label.extend(list(labels.cpu().numpy()))
        train_predict_prob.extend(train_outputs[:,1].cpu().detach().numpy())

        # 反向传播和优化
        train_loss.backward()
        optimizer.step()

    model.eval()
    test_correct = 0
    test_predict = []
    test_label_list = []
    test_predict_prob = []
    with torch.no_grad():
        for test_inputs, test_labels in testLoader:
            test_outputs = model(test_inputs.to(device))
            _, test_predicted = torch.max(test_outputs, 1)
            test_loss = criterion(test_outputs, test_labels.to(torch.long))
            test_correct += sum(test_predicted == test_labels)
            test_predict.extend(list(test_predicted.cpu().numpy()))
            test_label_list.extend(list(test_labels.cpu().numpy()))
            test_predict_prob.extend(test_outputs[:,1].cpu().numpy())

    # save best model
    if test_loss < least_eval_loss:
        least_eval_loss = test_loss
        torch.save(model.state_dict(), 
                   f'{epoch}_best_model_f1_{f1_score(test_label_list, test_predict):.3f}_auc_{roc_auc_score(train_label, train_predict_prob):.3f}.pth')


    print(
    f"epoch_{epoch}:\n",
    "training roc_auc_score {:.5f},".format(roc_auc_score(train_label, train_predict_prob)),
    "training f1_score {:.5f},".format(f1_score(train_label, train_predict)),
    "training acc {:.5f},".format(train_correct / len(trainLoader.dataset.data)),
    "training loss {:.5f}".format(train_loss),
    "\n",
    "testing roc_auc_score {:.5f},".format(roc_auc_score(test_label_list, test_predict_prob)),
    "testing f1_score {:.5f},".format(f1_score(test_label_list, test_predict)),
    "testing acc {:.5f},".format(test_correct / len(testLoader.dataset.data)),
    "testing loss {:.5f}".format(test_loss),
    "\n----------------------------------"
    )
    
         




epoch_0:
 training roc_auc_score 0.48226, training f1_score 0.20921, training acc 0.56651, training loss 0.62331 
 testing roc_auc_score 0.55500, testing f1_score 0.00000, testing acc 0.91743, testing loss 0.52481 
----------------------------------
epoch_1:
 training roc_auc_score 0.57890, training f1_score 0.00000, training acc 0.61697, training loss 0.69267 
 testing roc_auc_score 0.35444, testing f1_score 0.00000, testing acc 0.91743, testing loss 0.39140 
----------------------------------
epoch_2:
 training roc_auc_score 0.69216, training f1_score 0.34906, training acc 0.68349, training loss 0.68681 
 testing roc_auc_score 0.67833, testing f1_score 0.36364, testing acc 0.93578, testing loss 0.30779 
----------------------------------
epoch_3:
 training roc_auc_score 0.75766, training f1_score 0.71127, training acc 0.81193, training loss 0.24931 
 testing roc_auc_score 0.87500, testing f1_score 0.46154, testing acc 0.93578, testing loss 0.23808 
----------------------------------
