In [None]:
!pip install kaggle
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle config set -n path -v /content
!kaggle datasets download -d wwwdiviner/7015-dataset

In [None]:
import zipfile

def unzip_file(zip_file_path, extract_folder):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)

zip_file_path = '/content/datasets/wwwdiviner/7015-dataset/7015-dataset.zip'
extract_folder = '/content/woa7015'

unzip_file(zip_file_path, extract_folder)

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder, DatasetFolder
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from glob import glob
import gc
import sys
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torchsummary

if torch.cuda.is_available():
    device = torch.device("cuda")  # using first available GPU
    print("GPU is available.")
else:
    device = torch.device("cpu")
    print("GPU is not available. Switching to CPU.")

In [None]:
# show images and labels
def show_images(images, labels):
    plt.figure(figsize=(12, 6))
    for i in range(len(images)):
        plt.subplot(4, 8, i + 1)
        plt.imshow(images[i].permute(1, 2, 0))
        plt.title(labels[labels[i]])
        plt.axis('off')
    plt.show()

In [None]:
# define data transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
batch_size = 30
# 加载图像数据集
trainDataset = ImageFolder(root='/content/woa7015/train/train', transform=transform)
trainLoader = DataLoader(trainDataset, batch_size=batch_size, shuffle=False)
testDataset = ImageFolder(root='/content/woa7015/test/test', transform=transform)
testLoader = DataLoader(testDataset, batch_size=batch_size, shuffle=False)
labels = trainDataset.classes

In [None]:
train_samples = torch.tensor([]).to(device)
train_labels = torch.tensor([]).to(device)
test_samples = torch.tensor([]).to(device)
test_labels = torch.tensor([]).to(device)

In [None]:
import cv2
import numpy as np
import torch

def calculate_optical_flow(video_frames):
    """
    Calculate optical flow between each pair of consecutive frames in a video.

    Args:
        video_frames (torch.Tensor): Tensor of shape (num_frames, C, H, W)

    Returns:
        torch.Tensor: Tensor of optical flow images of shape (num_frames-1, 2, H, W)
    """
    num_frames, C, H, W = video_frames.shape
    optical_flows = []

    for i in range(num_frames - 1):
        frame1 = video_frames[i].permute(1, 2, 0).numpy()   # Convert to (H, W, C)
        frame2 = video_frames[i+1].permute(1, 2, 0).numpy() # Convert to (H, W, C)

        # Convert frames to grayscale
        gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(gray1, gray2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        # Convert flow to tensor and add to list
        flow_tensor = torch.from_numpy(flow).permute(2, 0, 1) # Convert to (2, H, W)
        optical_flows.append(flow_tensor)

    return torch.stack(optical_flows)



In [None]:
num = 0
for images, labels in tqdm(trainLoader):
    images = images.to(device)
    optical_flows = calculate_optical_flow(images)
    train_samples = torch.cat([train_samples, optical_flows.unsqueeze(0)], dim=0)
    label = list(map(lambda x: x.split('_')[1], trainDataset.class_to_idx.keys()))[labels[0]]
    if label == 'arm':
        label = 0
    else:
        label = 1
    train_labels = torch.cat([train_labels, torch.tensor(label).unsqueeze(0).to(device)], dim=0)
    num += 1
    if num == 200:
      break
for images, labels in tqdm(testLoader):
    images = images.to(device)
    optical_flows = calculate_optical_flow(images)
    test_samples = torch.cat([test_samples, optical_flows.unsqueeze(0)], dim=0)
    label = list(map(lambda x: x.split('_')[1], testDataset.class_to_idx.keys()))[labels[0]]
    if label == 'arm':
        label = 0
    else:
        label = 1
    test_labels = torch.cat([test_labels, torch.tensor(label).unsqueeze(0).to(device)], dim=0)
    num += 1
    if num == 310:
      break

In [None]:
class myDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class CNNLSTM(nn.Module):
    def __init__(self, num_classes, lstm_hidden_size, lstm_num_layers, lstm_input_size):
        super(CNNLSTM, self).__init__()

        # 加载预训练的ResNet模型
        self.resnet = models.resnet18(pretrained=True)
        # 移除ResNet的最后一个全连接层，用于特征提取

        # self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        for name, param in self.resnet.named_parameters():
            if "fc" not in name:  # 不冻结全连接层的参数
                param.requires_grad = False

        # LSTM模型
        self.lstm = nn.LSTM(input_size=lstm_input_size, hidden_size=lstm_hidden_size,
                            num_layers=lstm_num_layers, batch_first=True)

        # 全连接层，用于分类
        self.fc = nn.Linear(lstm_hidden_size, num_classes)

    def forward(self, x):
        # x shape: (batch_size, num_frames, C, H, W)
        batch_size, num_frames, C, H, W = (lstm_input_size, 30, 3, 224, 224)

        # 将每一帧通过CNN进行特征提取
        cnn_features = torch.zeros(batch_size, num_frames, lstm_input_size).to(x.device)
        for i in range(num_frames):
            frame_features = self.resnet(x[:, i, :, :, :])
            cnn_features[:, i, :] = frame_features.squeeze(-1).squeeze(-1)

        # 将特征输入LSTM模型
        lstm_out, _ = self.lstm(cnn_features)

        # 取LSTM的最后一个时间步的输出
        lstm_last_output = lstm_out[:, -1, :]

        # 全连接层
        output = self.fc(lstm_last_output)

        return output


In [None]:
lstm_batch_size = 8
trainDataset = myDataset(train_samples, train_labels)
trainLoader = DataLoader(trainDataset, batch_size=lstm_batch_size, shuffle=True)
testDataset = myDataset(test_samples, test_labels)
testLoader = DataLoader(testDataset, batch_size=lstm_batch_size, shuffle=False)

In [None]:
for inputs, labels in trainLoader:
  print(inputs.shape)
  break

In [None]:
# 创建模型实例
num_classes = 2
lstm_hidden_size = 4
lstm_num_layers = 3
lstm_input_size = 1000
model = CNNLSTM(num_classes, lstm_hidden_size, lstm_num_layers, lstm_input_size)
model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

In [None]:
!pip install torchstat

In [None]:
from torchstat import stat

stat(model, (3, 224, 224))

In [None]:
import random
import os
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 使用这个函数来设置一个全局种子
seed_everything(42)

# 初始化最佳性能指标
least_eval_loss = 100

for epoch in range(300):
    model.train()
    train_correct = 0
    train_predict = []
    train_label = []
    train_predict_prob = []
    for inputs, labels in trainLoader:
        optimizer.zero_grad()
        # 将数据输入模型
        train_outputs = model(inputs.to(device))
        _, train_predicted = torch.max(train_outputs, 1)
        train_correct += sum(torch.argmax(train_outputs, dim=1) == labels)
        # 计算损失
        train_loss = criterion(train_outputs, labels.to(torch.long))
        train_predict.extend(list(train_predicted.cpu().numpy()))
        train_label.extend(list(labels.cpu().numpy()))
        train_predict_prob.extend(train_outputs[:,1].cpu().detach().numpy())

        # 反向传播和优化
        train_loss.backward()
        optimizer.step()

    model.eval()
    test_correct = 0
    test_predict = []
    test_label_list = []
    test_predict_prob = []
    with torch.no_grad():
        for test_inputs, test_labels in testLoader:
            test_outputs = model(test_inputs.to(device))
            _, test_predicted = torch.max(test_outputs, 1)
            test_loss = criterion(test_outputs, test_labels.to(torch.long))
            test_correct += sum(test_predicted == test_labels)
            test_predict.extend(list(test_predicted.cpu().numpy()))
            test_label_list.extend(list(test_labels.cpu().numpy()))
            test_predict_prob.extend(test_outputs[:,1].cpu().numpy())

    # save best model
    if test_loss < least_eval_loss and epoch > 10:
        least_eval_loss = test_loss
        torch.save(model.state_dict(),
                   f'{epoch}_best_model_f1_{f1_score(test_label_list, test_predict):.3f}_auc_{roc_auc_score(train_label, train_predict_prob):.3f}.pth')


    print(
    f"epoch_{epoch}:\n",
    "training roc_auc_score {:.5f},".format(roc_auc_score(train_label, train_predict_prob)),
    "training f1_score {:.5f},".format(f1_score(train_label, train_predict)),
    "training acc {:.5f},".format(train_correct / len(trainLoader.dataset.data)),
    "training loss {:.5f}".format(train_loss),
    "\n",
    "testing roc_auc_score {:.5f},".format(roc_auc_score(test_label_list, test_predict_prob)),
    "testing f1_score {:.5f},".format(f1_score(test_label_list, test_predict)),
    "testing acc {:.5f},".format(test_correct / len(testLoader.dataset.data)),
    "testing loss {:.5f}".format(test_loss),
    "\n----------------------------------"
    )


