In [None]:
from google.colab import drive
drive.mount('/content/drive')


MessageError: Error: credential propagation was unsuccessful

# **dataset**

In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from sklearn.metrics import classification_report, roc_curve, auc
import numpy as np


class ASD_Isolated(Dataset):
    def __init__(self, data_path, num_classes=2, transform=None):
        super(ASD_Isolated, self).__init__()
        self.data_path = data_path
        self.num_classes = num_classes
        self.transform = transform
        # self.frames = 30  # 如果不同子文件夹内图像数量不同，请移除此属性
        self.classes = sorted(os.listdir(data_path))  # 这里假设data_path直接是包含类别的路径
        assert len(self.classes) == num_classes, f"类别数量({len(self.classes)})与num_classes参数({num_classes})不匹配"
        self.data_info = self._get_data_info()

    def _get_data_info(self):
      data_info = []
      for category in self.classes:
          category_path = os.path.join(self.data_path, category)
          # print("category_path", category_path)
          for instance_folder in sorted(os.listdir(category_path)):
              instance_path = os.path.join(category_path, instance_folder)
              # print("instance_path", instance_path)
              if os.path.isdir(instance_path):
                  # 遍历实例文件夹中的子文件夹
                  #sub_folders = [f for f in sorted(os.listdir(instance_path)) if os.path.isdir(os.path.join(instance_path, f))]
                  # for sub_folder in sub_folders:
                  #     print("instance_path：", instance_path)
                  #     sub_folder_path = os.path.join(instance_path, sub_folder)
                  #     image_files = [img for img in sorted(os.listdir(sub_folder_path)) if img.endswith('.jpg')]
                  #     # 现在不检查图像数量，直接添加信息
                  #     data_info.append((sub_folder_path, category))
                  image_files = [img for img in sorted(os.listdir(instance_path)) if img.endswith('.jpg')]

                  # if len(image_files) != 30:
                  #   print("instance_path:", instance_path)
                  # 现在不检查图像数量，直接添加信息
                  data_info.append((instance_path, category))
      # print(f"Loaded {len(data_info)} samples.")  # 调试打印语句
      return data_info  # 注意这里，现在return语句在循环之外


    def read_images(self, folder_path):
        image_files = sorted([os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.jpg')])
        images = [Image.open(file).convert('RGB') for file in image_files]
        if self.transform is not None:
            images = [self.transform(image) for image in images]
        images = torch.stack(images, dim=0)
        images = images.permute(1, 0, 2, 3)  # Adjust for the expected input dimensions of the CNN
        return images

    def __len__(self):
        return len(self.data_info)

    def __getitem__(self, idx):
        folder_path, label = self.data_info[idx]
        images = self.read_images(folder_path)
        label_index = self.classes.index(label)
        label_tensor = torch.tensor(label_index, dtype=torch.long)
        return {'data': images, 'label': label_tensor}

# 测试代码
if __name__ == '__main__':
    transform = transforms.Compose([
        transforms.Resize([128, 128]),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])

    # 您需要替换这里的路径为您数据集的实际路径
    dataset_path = "/content/drive/MyDrive/output_frames/test"

    dataset = ASD_Isolated(data_path=dataset_path, num_classes=2, transform=transform)
    print(f"Dataset size: {len(dataset)}")
    if len(dataset) > 0:  # 确保数据集不为空
        sample = dataset[0]
        print(f"Sample image shape: {sample['data'].shape}, Label: {sample['label']}")
    else:
        print("Dataset is empty!")


# **train**

In [None]:
import torch
from sklearn.metrics import accuracy_score

def train_epoch(model, criterion, optimizer, dataloader, device, epoch, logger, log_interval, writer):
    model.train()
    losses = []
    all_label = []
    all_pred = []

    for batch_idx, data in enumerate(dataloader):
        # 获取输入和标签
        inputs, labels = data['data'].to(device), data['label'].to(device)

        optimizer.zero_grad()
        # 前向传播
        outputs = model(inputs)
        if isinstance(outputs, list):
            outputs = outputs[0]

        # 计算损失
        loss = criterion(outputs, labels.squeeze())
        losses.append(loss.item())

        # 计算准确率
        prediction = torch.max(outputs, 1)[1]
        all_label.extend(labels.squeeze())
        all_pred.extend(prediction)
        score = accuracy_score(labels.squeeze().cpu().data.squeeze().numpy(), prediction.cpu().data.squeeze().numpy())

        # 反向传播 & 优化
        loss.backward()
        optimizer.step()

        # if (batch_idx + 1) % log_interval == 0:
        #     # logger.info("epoch {:3d} | iteration {:5d} | Loss {:.6f} | Acc {:.2f}%".format(epoch+1, batch_idx+1, loss.item(), score*100))

    # 计算平均损失和准确率
    training_loss = sum(losses)/len(losses)
    all_label = torch.stack(all_label, dim=0)
    all_pred = torch.stack(all_pred, dim=0)
    training_acc = accuracy_score(all_label.squeeze().cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())

    print('Loss', {'train': training_loss}, epoch+1)
    print('Accuracy', {'train': training_acc}, epoch+1)
    print("第 {} 轮平均训练损失: {:.6f} | 准确率: {:.2f}%".format(epoch+1, training_loss, training_acc*100))

    return training_loss, training_acc


# **tool**

In [None]:
import torch
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import torchvision.utils as utils
import cv2
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix


def get_label_and_pred(model, dataloader, device):
    all_label = []
    all_pred = []
    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader):
            # get the inputs and labels
            inputs, labels = data['data'].to(device), data['label'].to(device)
            # forward
            outputs = model(inputs)
            if isinstance(outputs, list):
                outputs = outputs[0]
            # collect labels & prediction
            prediction = torch.max(outputs, 1)[1]
            all_label.extend(labels.squeeze())
            all_pred.extend(prediction)
    # Compute accuracy
    all_label = torch.stack(all_label, dim=0)
    all_pred = torch.stack(all_pred, dim=0)
    all_label = all_label.squeeze().cpu().data.squeeze().numpy()
    all_pred = all_pred.cpu().data.squeeze().numpy()
    return all_label, all_pred


def plot_confusion_matrix(model, dataloader, device, save_path='confmat.png', normalize=True):
    # Get prediction
    all_label, all_pred = get_label_and_pred(model, dataloader, device)
    confmat = confusion_matrix(all_label, all_pred)

    # Normalize the matrix
    if normalize:
        confmat = confmat.astype('float') / confmat.sum(axis=1)[:, np.newaxis]
    # Draw matrix
    plt.figure(figsize=(20,20))
    # confmat = np.random.rand(100,100)
    plt.imshow(confmat, interpolation='nearest', cmap=plt.cm.Blues)
    plt.colorbar()
    # Add ticks
    ticks = np.arange(100)
    plt.xticks(ticks, fontsize=8)
    plt.yticks(ticks, fontsize=8)
    plt.grid(True)
    # Add title & labels
    plt.title('Confusion matrix', fontsize=20)
    plt.xlabel('Predicted label', fontsize=20)
    plt.ylabel('True label', fontsize=20)
    # Save figure
    plt.savefig(save_path)

    # Ranking
    sorted_index = np.diag(confmat).argsort()
    for i in range(10):
        # print(type(sorted_index[i]))
        print(test_set.label_to_word(int(sorted_index[i])), confmat[sorted_index[i]][sorted_index[i]])
    # Save to csv
    np.savetxt('matrix.csv', confmat, delimiter=',')


def visualize_attn(I, c):
    # Image
    img = I.permute((1,2,0)).cpu().numpy()
    # Heatmap
    N, C, H, W = c.size()
    a = F.softmax(c.view(N,C,-1), dim=2).view(N,C,H,W)
    up_factor = 128/H
    # print(up_factor, I.size(), c.size())
    if up_factor > 1:
        a = F.interpolate(a, scale_factor=up_factor, mode='bilinear', align_corners=False)
    attn = utils.make_grid(a, nrow=4, normalize=True, scale_each=True)
    attn = attn.permute((1,2,0)).mul(255).byte().cpu().numpy()
    attn = cv2.applyColorMap(attn, cv2.COLORMAP_JET)
    attn = cv2.cvtColor(attn, cv2.COLOR_BGR2RGB)
    # Add the heatmap to the image
    vis = 0.6 * img + 0.4 * attn
    return torch.from_numpy(vis).permute(2,0,1)


def plot_attention_map(model, dataloader, device):
    # Summary writer
    writer = SummaryWriter("runs/attention_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now()))

    model.eval()
    with torch.no_grad():
        for batch_idx, data in enumerate(dataloader):
            # get images
            inputs = data['data'].to(device)
            if batch_idx == 0:
                images = inputs[0:16,:,:,:,:]
                I = utils.make_grid(images[:,:,0,:,:], nrow=4, normalize=True, scale_each=True)
                writer.add_image('origin', I)
                _, c1, c2, c3, c4 = model(images)
                # print(I.shape, c1.shape, c2.shape, c3.shape, c4.shape)
                attn1 = visualize_attn(I, c1[:,:,0,:,:])
                writer.add_image('attn1', attn1)
                attn2 = visualize_attn(I, c2[:,:,0,:,:])
                writer.add_image('attn2', attn2)
                attn3 = visualize_attn(I, c3[:,:,0,:,:])
                writer.add_image('attn3', attn3)
                attn4 = visualize_attn(I, c4[:,:,0,:,:])
                writer.add_image('attn4', attn4)
                break


"""
Calculate Word Error Rate
Word Error Rate = (Substitutions + Insertions + Deletions) / Number of Words Spoken
Reference:
https://holianh.github.io/portfolio/Cach-tinh-WER/
https://github.com/imalic3/python-word-error-rate
"""
def wer(r, h):
    # initialisation
    d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
    d = d.reshape((len(r)+1, len(h)+1))
    for i in range(len(r)+1):
        for j in range(len(h)+1):
            if i == 0:
                d[0][j] = j
            elif j == 0:
                d[i][0] = i

    # computation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitution = d[i-1][j-1] + 1
                insertion = d[i][j-1] + 1
                deletion = d[i-1][j] + 1
                d[i][j] = min(substitution, insertion, deletion)

    return float(d[len(r)][len(h)]) / len(r) * 100


if __name__ == '__main__':
    # Calculate WER
    r = [1,2,3,4]
    h = [1,1,3,5,6]
    print(wer(r, h))

# **validation**

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

def val_epoch(model, criterion, dataloader, device, epoch, logger, writer):
    model.eval()
    losses = []
    all_labels = []
    all_preds = []
    all_probs = []

    with torch.no_grad():
        for batch_idx, data in enumerate(dataloader):
            inputs = data['data'].to(device)
            labels = data['label'].to(device)

            if not isinstance(inputs, torch.Tensor) or not isinstance(labels, torch.Tensor):
                print(f'Error: Data or labels are not tensors at batch index {batch_idx}.')
                continue

            outputs = model(inputs)
            if isinstance(outputs, tuple) or isinstance(outputs, list):
                outputs = outputs[0]

            if outputs.size(0) != labels.size(0):
                print(f'Error: Mismatch between output size {outputs.size(0)} and labels size {labels.size(0)} at batch index {batch_idx}.')
                continue

            loss = criterion(outputs, labels)
            losses.append(loss.item())

            _, predicted = torch.max(outputs, 1)
            all_labels.append(labels)
            all_preds.append(predicted)
            all_probs.append(outputs)

    validation_loss = sum(losses) / len(losses)
    all_labels = torch.cat(all_labels).cpu()
    all_preds = torch.cat(all_preds).cpu()
    all_probs = torch.cat(all_probs).cpu()

    # 计算准确率
    validation_acc = accuracy_score(all_labels.numpy(), all_preds.numpy())

    # 计算召回率、精确率和 F1 分数
    recall = recall_score(all_labels.numpy(), all_preds.numpy())
    precision = precision_score(all_labels.numpy(), all_preds.numpy())
    f1 = f1_score(all_labels.numpy(), all_preds.numpy())

    # 计算 AUC-ROC 分数，假设您的输出为二元分类的概率
    # 注意：您需要根据您的模型输出调整这部分
    probs = torch.nn.functional.softmax(all_probs, dim=1)[:, 1]
    auc_roc = roc_auc_score(all_labels.numpy(), probs.numpy())

    print('Loss/val', validation_loss, epoch)
    print('Accuracy/val', validation_acc, epoch)
    print(f'Validation - Epoch: {epoch}, Loss: {validation_loss:.4f}, Accuracy: {validation_acc:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}, F1: {f1:.4f}, AUC-ROC: {auc_roc:.4f}')

    return validation_loss, validation_acc, recall, precision, f1, auc_roc, all_labels, all_probs


# conv3d

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.hub import load_state_dict_from_url
import torchvision
from functools import partial
from collections import OrderedDict
import math

import os,inspect,sys
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0,currentdir)
#from Attention import ProjectorBlock3D, LinearAttentionBlock3D

"""
Implementation of 3D CNN.
"""
class CNN3D(nn.Module):
    def __init__(self, sample_size=128, sample_duration=16, drop_p=0.0, hidden1=512, hidden2=256, num_classes=100):
        super(CNN3D, self).__init__()
        self.sample_size = sample_size
        self.sample_duration = sample_duration
        self.num_classes = num_classes

        # network params
        self.ch1, self.ch2, self.ch3 = 32, 48, 48
        self.k1, self.k2, self.k3 = (3,7,7), (3,7,7), (3,5,5)
        self.s1, self.s2, self.s3 = (2,2,2), (2,2,2), (2,2,2)
        self.p1, self.p2, self.p3 = (0,0,0), (0,0,0), (0,0,0)
        self.d1, self.d2, self.d3 = (1,1,1), (1,1,1), (1,1,1)
        self.hidden1, self.hidden2 = hidden1, hidden2
        self.drop_p = drop_p
        self.pool_k, self.pool_s, self.pool_p, self.pool_d = (1,2,2), (1,2,2), (0,0,0), (1,1,1)
        # Conv1
        self.conv1_output_shape = self.compute_output_shape(self.sample_duration, self.sample_size,
            self.sample_size, self.k1, self.s1, self.p1, self.d1)
        # self.conv1_output_shape = self.compute_output_shape(self.conv1_output_shape[0], self.conv1_output_shape[1],
        #     self.conv1_output_shape[2], self.pool_k, self.pool_s, self.pool_p, self.pool_d)
        # Conv2
        self.conv2_output_shape = self.compute_output_shape(self.conv1_output_shape[0], self.conv1_output_shape[1],
            self.conv1_output_shape[2], self.k2, self.s2, self.p2, self.d2)
        # self.conv2_output_shape = self.compute_output_shape(self.conv2_output_shape[0], self.conv2_output_shape[1],
        #     self.conv2_output_shape[2], self.pool_k, self.pool_s, self.pool_p, self.pool_d)
        # Conv3
        self.conv3_output_shape = self.compute_output_shape(self.conv2_output_shape[0], self.conv2_output_shape[1],
            self.conv2_output_shape[2], self.k3, self.s3, self.p3, self.d3)
        # print(self.conv1_output_shape, self.conv2_output_shape, self.conv3_output_shape)

        # network architecture
        # in_channels=1 for grayscale, 3 for rgb
        self.conv1 = nn.Conv3d(in_channels=3, out_channels=self.ch1, kernel_size=self.k1,
            stride=self.s1, padding=self.p1, dilation=self.d1)
        self.bn1 = nn.BatchNorm3d(self.ch1)
        self.conv2 = nn.Conv3d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2,
            stride=self.s2, padding=self.p2, dilation=self.d2)
        self.bn2 = nn.BatchNorm3d(self.ch2)
        self.conv3 = nn.Conv3d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3,
            stride=self.s3, padding=self.p3, dilation=self.d3)
        self.bn3 = nn.BatchNorm3d(self.ch3)
        self.relu = nn.ReLU(inplace=True)
        self.drop = nn.Dropout3d(p=self.drop_p)
        self.pool = nn.MaxPool3d(kernel_size=self.pool_k)
        self.fc1 = nn.Linear(self.ch3 * self.conv3_output_shape[0] * self.conv3_output_shape[1] * self.conv3_output_shape[2], self.hidden1)
        self.fc2 = nn.Linear(self.hidden1, self.hidden2)
        self.fc3 = nn.Linear(self.hidden2, self.num_classes)

    def forward(self, x):
        # Conv1
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        # x = self.pool(x)
        # x = self.drop(x)
        # Conv2
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        # x = self.pool(x)
        # x = self.drop(x)
        # Conv3
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        # x = self.drop(x)
        # MLP
        # print(x.shape)
        # x.size(0) ------ batch_size
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=self.drop_p, training=self.training)
        x = self.fc3(x)

        return x

    def compute_output_shape(self, D_in, H_in, W_in, k, s, p, d):
        # Conv
        D_out = np.floor((D_in + 2*p[0] - d[0]*(k[0] - 1) - 1)/s[0] + 1).astype(int)
        H_out = np.floor((H_in + 2*p[1] - d[1]*(k[1] - 1) - 1)/s[1] + 1).astype(int)
        W_out = np.floor((W_in + 2*p[2] - d[2]*(k[2] - 1) - 1)/s[2] + 1).astype(int)

        return D_out, H_out, W_out


"""
Implementation of 3D Resnet
Reference: Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?
"""
class BasicBlock(nn.Module):
    expansion = 1
    # planes refer to the number of feature maps
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.stride = stride
        self.downsample = downsample
        self.conv1 = nn.Conv3d(
            inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm3d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(
            planes, planes, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm3d(planes)

    def forward(self, x):
        residual = x
        # conv1
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        # conv2
        out = self.conv2(out)
        out = self.bn2(out)
        # downsample
        if self.downsample is not None:
            residual = self.downsample(x)

        # print(out.shape, residual.shape)
        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4
    # planes refer to the number of feature maps
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.stride = stride
        self.downsample = downsample
        self.conv1 = nn.Conv3d(
            inplanes, planes, kernel_size=1, bias=False) # kernal_size=1 don't need padding
        self.bn1 = nn.BatchNorm3d(planes)
        self.conv2 = nn.Conv3d(
            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm3d(planes)
        self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(planes * 4)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x
        # conv1
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        # conv2
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        # conv3
        out = self.conv3(out)
        out = self.bn3(out)
        # downsample
        if self.downsample is not None:
            residual = self.downsample(x)

        # print(out.shape, residual.shape)
        out += residual
        out = self.relu(out)

        return out


def downsample_basic_block(x, planes, stride):
    # decrease data resolution if stride not equals to 1
    out = F.avg_pool3d(x, kernel_size=1, stride=stride)
    # shape: (batch_size, channel, t, h, w)
    # try to match the channel size
    zero_pads = torch.Tensor(
        out.size(0), planes - out.size(1), out.size(2), out.size(3),
        out.size(4)).zero_()
    if isinstance(out.data, torch.cuda.FloatTensor):
        zero_pads = zero_pads.cuda()

    out = Variable(torch.cat([out.data, zero_pads], dim=1))

    return out


class ResNet(nn.Module):
    def __init__(self, block, layers, shortcut_type, sample_size, sample_duration, attention=False, num_classes=500):
        super(ResNet, self).__init__()
        # initialize inplanes to 64, it'll be changed later
        self.inplanes = 64
        self.conv1 = nn.Conv3d(
            3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        # layers refers to the number of blocks in each layer
        self.layer1 = self._make_layer(
            block, 64, layers[0], shortcut_type, stride=1)
        self.layer2 = self._make_layer(
            block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(
            block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(
            block, 512, layers[3], shortcut_type, stride=2)
        # calclatue kernal size for average pooling
        last_duration = int(math.ceil(sample_duration / 16))
        last_size = int(math.ceil(sample_size / 32))
        self.avgpool = nn.AvgPool3d(
            (last_duration, last_size, last_size), stride=1)
        # attention blocks
        self.attention = attention
        if self.attention:
            self.attn1 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
            self.attn2 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
            self.attn3 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
            self.attn4 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
            self.projector1 = ProjectorBlock3D(in_channels=64*block.expansion, out_channels=512*block.expansion)
            self.projector2 = ProjectorBlock3D(in_channels=128*block.expansion, out_channels=512*block.expansion)
            self.projector3 = ProjectorBlock3D(in_channels=256*block.expansion, out_channels=512*block.expansion)
            self.fc = nn.Linear(512 * block.expansion * 4, num_classes)
        else:
            self.fc = nn.Linear(512 * block.expansion, num_classes)
        # init the weights
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, shortcut_type, stride):
        downsample = None
        # when the in-channel and the out-channel dismatch, downsample!!!
        if stride != 1 or self.inplanes != planes * block.expansion:
            # stride once for downsample and block.
            if shortcut_type == 'A':
                downsample = partial(
                    downsample_basic_block,
                    planes=planes * block.expansion,
                    stride=stride)
            else:
                downsample = nn.Sequential(
                    nn.Conv3d(
                        self.inplanes,
                        planes * block.expansion,
                        kernel_size=1,
                        stride=stride,
                        bias=False), nn.BatchNorm3d(planes * block.expansion))

        layers = []
        # only the first block needs downsample.
        layers.append(block(self.inplanes, planes, stride, downsample))
        # change inplanes for the next layer
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        l1 = self.layer1(x)
        l2 = self.layer2(l1)
        l3 = self.layer3(l2)
        l4 = self.layer4(l3)

        g = self.avgpool(l4)
        # attention
        if self.attention:
            # print(l1.shape, l2.shape, l3.shape, l4.shape, g.shape)
            c1, g1 = self.attn1(self.projector1(l1), g)
            c2, g2 = self.attn2(self.projector2(l2), g)
            c3, g3 = self.attn3(self.projector3(l3), g)
            c4, g4 = self.attn4(l4, g)
            g = torch.cat((g1,g2,g3,g4), dim=1)
            x = self.fc(g)
        else:
            c1, c2, c3, c4 = None, None, None, None
            # x.size(0) ------ batch_size
            g = g.view(g.size(0), -1)
            x = self.fc(g)

        return [x, c1, c2, c3, c4]

    def load_my_state_dict(self, state_dict):
        my_state_dict = self.state_dict()
        for name, param in state_dict.items():
            if name == 'fc.weight' or name == 'fc.bias':
                continue
            my_state_dict[name].copy_(param.data)


import torchvision.models as models

# ...[省略其他import和类定义]...

def resnet18(pretrained=False, progress=True, **kwargs):
    """Constructs a ResNet-18 model."""
    model = models.resnet18(pretrained=pretrained, progress=progress, **kwargs)
    return model

def resnet34(pretrained=False, progress=True, **kwargs):
    """Constructs a ResNet-34 model."""
    model = models.resnet34(pretrained=pretrained, progress=progress, **kwargs)
    return model

def resnet101(pretrained=False, progress=True, **kwargs):
    """Constructs a ResNet-101 model."""
    model = models.resnet101(pretrained=pretrained, progress=progress, **kwargs)
    return model

def resnet50(pretrained=False, progress=True, **kwargs):
    """构建一个ResNet-50模型。"""
    model = models.resnet50(pretrained=pretrained,progress=progress, **kwargs)
    return model


def resnet152(pretrained=False, progress=True, **kwargs):
    """Constructs a ResNet-152 model."""
    model = models.resnet152(pretrained=pretrained, progress=progress, **kwargs)
    return model

def resnet200(pretrained=False, progress=True, **kwargs):
    """Constructs a ResNet-200 model."""
    model = models.resnet200(pretrained=pretrained, progress=progress, **kwargs)
    return model


# ...[省略其他函数和类定义]...

# 使用时可以直接调用
#cnn3d = resnet50(pretrained=True, progress=True, num_classes=num_classes)


"""
3D CNN Models from torchvision.models
Reference: https://pytorch.org/docs/stable/torchvision/models.html#video-classification
"""
class r3d_18(nn.Module):
    def __init__(self, pretrained=True, num_classes=500):
        super(r3d_18, self).__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes
        model = torchvision.models.video.r3d_18(pretrained=self.pretrained)
        # delete the last fc layer
        modules = list(model.children())[:-1]
        # print(modules)
        self.r3d_18 = nn.Sequential(*modules)
        self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)

    def forward(self, x):
        out = self.r3d_18(x)
        # print(out.shape)
        # Flatten the layer to fc
        out = out.flatten(1)
        out = self.fc1(out)

        return out


class mc3_18(nn.Module):
    def __init__(self, pretrained=True, num_classes=500):
        super(mc3_18, self).__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes
        model = torchvision.models.video.mc3_18(pretrained=self.pretrained)
        # delete the last fc layer
        modules = list(model.children())[:-1]
        # print(modules)
        self.mc3_18 = nn.Sequential(*modules)
        self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)

    def forward(self, x):
        out = self.mc3_18(x)
        # print(out.shape)
        # Flatten the layer to fc
        out = out.flatten(1)
        out = self.fc1(out)

        return out


class r2plus1d_18(nn.Module):
    def __init__(self, pretrained=True, num_classes=500):
        super(r2plus1d_18, self).__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes
        model = torchvision.models.video.r2plus1d_18(pretrained=self.pretrained)
        # delete the last fc layer
        modules = list(model.children())[:-1]
        # print(modules)
        self.r2plus1d_18 = nn.Sequential(*modules)
        self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)

    def forward(self, x):
        out = self.r2plus1d_18(x)
        # print(out.shape)
        # Flatten the layer to fc
        out = out.flatten(1)
        out = self.fc1(out)

        return out

# 调整数据维度以匹配模型的输入要求
#data = dataset[0]['data'].permute(1, 0, 2, 3, 4)  # 将维度重排为 [num_channels, 1, num_frames, height, width]

# 选择你希望使用的帧数
#desired_num_frames = 30  # 选择你希望使用的帧数
#data = data[:, :, :desired_num_frames, :, :]  # 切片选择帧数
#data = data.unsqueeze(0)  # 添加批次维度，变成 [1, num_channels, num_frames, height, width]

# 将处理后的数据传递给模型进行推理
#output = cnn3d(data)

# 在这里可以处理模型的输出，如计算损失或进行后续的操作


# Test
if __name__ == '__main__':
    import sys
    sys.path.append("..")
    import torchvision.transforms as transforms

    sample_size = 128
    sample_duration = 16
    num_classes = 2
    transform = transforms.Compose([transforms.Resize([sample_size, sample_size]), transforms.ToTensor()])
    dataset = ASD_Isolated(data_path="/content/drive/MyDrive/output_frames",

        num_classes=num_classes, transform=transform)
    #cnn3d = CNN3D(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes)
    #cnn3d = resnet50(pretrained=True, progress=True, sample_size=sample_size, sample_duration=sample_duration, attention=True, num_classes=num_classes)
    cnn3d = r3d_18(pretrained=True, num_classes=num_classes)
    # cnn3d = mc3_18(pretrained=True, num_classes=num_classes)
    # cnn3d = r2plus1d_18(pretrained=True, num_classes=num_classes)
    # print(dataset[0]['images'].shape)



    #print(cnn3d(dataset[0]['data'].unsqueeze(0)))

    # Test for loading pretrained models
    # state_dict = torch.load('resnet-18-kinetics.pth')
    # for name, param in state_dict.items():
    #     print(name)
    # # print(state_dict['arch'])
    # # print(state_dict['optimizer'])
    # # print(state_dict['epoch'])

# **ASD_CNN3D**

In [None]:
import os
from datetime import datetime
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import torch.nn.functional as F

# Assume the following modules are correctly imported:
# from dataset import ASD_Isol
# from train import train_epoch
# from validation import val_epoch
# from Conv3D import CNN3D

# Setting paths
train_data_path = "/content/drive/MyDrive/output_frames/train"
val_data_path = "/content/drive/MyDrive/output_frames/test"
model_path = "/content/drive/MyDrive/cnn3d_models"
log_path = "cnn3d_log_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
sum_path = "cnn3d_runs_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())

# Logging setup
logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
logger = logging.getLogger('CNN3D')
logger.info('Logging setup complete...')
writer = SummaryWriter(sum_path)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
num_classes = 2
epochs = 5
batch_size = 48
learning_rate = 1e-3
log_interval = 20
sample_size = 128
sample_duration = 30  # Video sample of 30 frames

# Load data
transform = transforms.Compose([
    transforms.Resize([sample_size, sample_size]),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Instantiating the dataset class for train and test sets
train_dataset = ASD_Isolated(data_path=train_data_path, transform=transform)
val_dataset = ASD_Isolated(data_path=val_data_path, transform=transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Create CNN3D model
model = CNN3D(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes).to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Lists to store metrics
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []
test_recalls, test_precisions, test_f1s, test_auc_scores = [], [], [], []

# Variables to store labels and probabilities of the last epoch
last_labels, last_probs = None, None

# Training starts
print("Training Starts".center(60, '#'))
for epoch in range(epochs):
    # Training phase
    train_loss, train_accuracy = train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)

    # Validation phase
    test_loss, test_accuracy, test_recall, test_precision, test_f1, test_auc_roc, labels, probs = val_epoch(model, criterion, val_loader, device, epoch, logger, writer)

    # Append metrics to lists
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)
    test_recalls.append(test_recall)
    test_precisions.append(test_precision)
    test_f1s.append(test_f1)
    test_auc_scores.append(test_auc_roc)

    # Update last labels and probabilities
    last_labels, last_probs = labels, probs

print("Training Complete".center(60, '#'))

# Plotting training and testing losses and accuracies
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Training and Testing Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.title('Training and Testing Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()

# Plotting recall, precision, and F1-score
plt.figure(figsize=(12, 5))
plt.subplot(1, 3, 1)
plt.plot(test_recalls, label='Test Recall')
plt.title('Test Recall per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(test_precisions, label='Test Precision')
plt.title('Test Precision per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(test_f1s, label='Test F1 Score')
plt.title('Test F1 Score per Epoch')
plt.xlabel('Epoch')
plt.ylabel('F1 Score')
plt.legend()

plt.tight_layout()
plt.show()

# AUC-ROC Curve for the last epoch
probs = F.softmax(last_probs, dim=1)[:, 1].numpy()  # Assuming binary classification
fpr, tpr, _ = roc_curve(last_labels.numpy(), probs)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC Curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


######################Training Starts#######################




Loss {'train': 0.6457189880311489} 1
Accuracy {'train': 0.7388535031847133} 1
第 1 轮平均训练损失: 0.645719 | 准确率: 73.89%




Loss/val 0.7488630060106516 0
Accuracy/val 0.8589743589743589 0
Validation - Epoch: 0, Loss: 0.7489, Accuracy: 0.8590, Recall: 0.2143, Precision: 1.0000, F1: 0.3529, AUC-ROC: 0.8672




Loss {'train': 0.06159824933856726} 2
Accuracy {'train': 0.9872611464968153} 2
第 2 轮平均训练损失: 0.061598 | 准确率: 98.73%




Loss/val 0.3366762474179268 1
Accuracy/val 0.9230769230769231 1
Validation - Epoch: 1, Loss: 0.3367, Accuracy: 0.9231, Recall: 0.7857, Precision: 0.7857, F1: 0.7857, AUC-ROC: 0.9475




Loss {'train': 0.008570991491433233} 3
Accuracy {'train': 0.9978768577494692} 3
第 3 轮平均训练损失: 0.008571 | 准确率: 99.79%




Loss/val 1.9930184236145578 2
Accuracy/val 0.8333333333333334 2
Validation - Epoch: 2, Loss: 1.9930, Accuracy: 0.8333, Recall: 0.0714, Precision: 1.0000, F1: 0.1333, AUC-ROC: 0.8672




Loss {'train': 0.0005665759887051536} 4
Accuracy {'train': 1.0} 4
第 4 轮平均训练损失: 0.000567 | 准确率: 100.00%




Loss/val 4.071958021721002 3
Accuracy/val 0.8205128205128205 3
Validation - Epoch: 3, Loss: 4.0720, Accuracy: 0.8205, Recall: 0.0000, Precision: 0.0000, F1: 0.0000, AUC-ROC: 0.7098


  _warn_prf(average, modifier, msg_start, len(result))


KeyboardInterrupt: 

In [None]:
import torch
from fvcore.nn import FlopCountAnalysis, parameter_count
# Create a dummy input tensor for FLOPs calculation
# Adjust the size according to the input dimensions your model expects
dummy_input = torch.randn(1, 3, sample_duration, sample_size, sample_size).to(device)

# Calculate FLOPs
flops = FlopCountAnalysis(model, dummy_input)
print("Total FLOPs: {flops.total()}")

# Calculate Parameters
params = parameter_count(model)
print(f"Total Parameters: {params['total']}")