In [15]:
import os
import sys
import pickle
from skimage import io
import matplotlib.pyplot as plt
import numpy 
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import _LRScheduler
from torch.autograd import Variable
import torchvision
import torchvision.transforms as transforms
import argparse
import glob
import cv2
import torch.optim as optim
import matplotlib
matplotlib.use('Agg')

## Loading Data (CIFAR100)

In [16]:
class CIFAR100Train(Dataset):
    """CIFAR100 test dataset, derived from
    torch.utils.data.DataSet
    """

    def __init__(self, path, transform=None):
        #if transform is given, we transoform data using
        with open(os.path.join(path, 'train'), 'rb') as CIFAR100:
            self.data = pickle.load(CIFAR100, encoding='bytes')
        self.transform = transform
        
    def __len__(self):
        return len(self.data['fine_labels'.encode()])

    def __getitem__(self, index):
        label = self.data['fine_labels'.encode()][index]
        r = self.data['data'.encode()][index, :1024].reshape(32, 32)
        g = self.data['data'.encode()][index, 1024:2048].reshape(32, 32)
        b = self.data['data'.encode()][index, 2048:].reshape(32, 32)
        image = numpy.dstack((r, g, b))

        if self.transform:
            image = self.transform(image)
        return label, image

class CIFAR100Test(Dataset):
    """CIFAR100 test dataset, derived from
    torch.utils.data.DataSet
    """

    def __init__(self, path, transform=None):
        with open(os.path.join(path, 'test'), 'rb') as CIFAR100:
            self.data = pickle.load(CIFAR100, encoding='bytes')
        self.transform = transform 

    def __len__(self):
        return len(self.data['data'.encode()])
    
    def __getitem__(self, index):
        label = self.data['fine_labels'.encode()][index]
        r = self.data['data'.encode()][index, :1024].reshape(32, 32)
        g = self.data['data'.encode()][index, 1024:2048].reshape(32, 32)
        b = self.data['data'.encode()][index, 2048:].reshape(32, 32)
        image = numpy.dstack((r, g, b))

        if self.transform:
            image = self.transform(image)
        return label, image

## Attention Modules (CAM,SAM,CBAM,ZAM)

In [17]:
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc1   = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2   = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=3):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, in_planes):
        super(CBAM, self).__init__()

        self.ca = ChannelAttention(in_planes)
        self.sa = SpatialAttention()
        
    def forward(self, x):
        
        out = x * (self.ca(x))
        out = out * (self.sa(out))
        
        return out

## ZAM ##    
class ZeroChannelAttention(nn.Module):
    def __init__(self):
        super(ZeroChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
    
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        return self.sigmoid(self.avg_pool(x) + self.max_pool(x))

class ZeroSpatialAttention(nn.Module):
    def __init__(self):
        super(ZeroSpatialAttention, self).__init__()

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        return self.sigmoid(avg_out + max_out)
    
class ZAM(nn.Module):
    def __init__(self, use_skip_connection = False):
        super(ZAM, self).__init__()

        self.ca = ZeroChannelAttention()
        self.sa = ZeroSpatialAttention()
        self.use_skip_connection = use_skip_connection
        
    def forward(self, x):
        
        out = x + x * self.ca(x) if self.use_skip_connection else x * self.ca(x)
        out = out + out * self.sa(out) if self.use_skip_connection else out * self.sa(out)
        
        return out

## Models (ResNet,MobileNet)

In [18]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )
        
        self.cbam = CBAM(planes)

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.cbam(out)
        out += self.shortcut(residual)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )
        
        self.cbam = CBAM(self.expansion*planes)
        
    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = self.cbam(out)
        out += self.shortcut(residual)
        out = F.relu(out)
        return out


class ResNetCBAM(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNetCBAM, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNetCBAM18():
    return ResNetCBAM(BasicBlock, [2,2,2,2])

class MobileNetCBAM(nn.Module):
    def __init__(self, classes = 100):
        super(MobileNetCBAM, self).__init__()

        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True)
            )

        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.ReLU(inplace=True),
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                CBAM(oup),
                nn.ReLU(inplace=True),
            )

        self.model = nn.Sequential(
            conv_bn(  3,  32, 1), 
            conv_dw( 32,  64, 1),
            conv_dw( 64, 128, 1),
            conv_dw(128, 128, 1),
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
            nn.AvgPool2d(4),
        )
        self.fc = nn.Linear(1024, classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

## Utils

In [19]:
def get_network():
    net=ResNetCBAM18().cuda()
    #net=MobileNetCBAM().cuda()
    """ return given network
    """
    return net


def get_training_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of CIFAR100 training dataset
        std: std of CIFAR100 training dataset
        path: path to CIFAR100 training python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle 
    Returns: train_data_loader:torch dataloader object
    """

    transform_train = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #CIFAR100_training = CIFAR100Train(path, transform=transform_train)
    CIFAR100_training = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    CIFAR100_training_loader = DataLoader(
        CIFAR100_training, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return CIFAR100_training_loader

def get_test_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of CIFAR100 test dataset
        std: std of CIFAR100 test dataset
        path: path to CIFAR100 test python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle 
    Returns: CIFAR100_test_loader:torch dataloader object
    """

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #CIFAR100_test = CIFAR100Test(path, transform=transform_test)
    CIFAR100_test = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    CIFAR100_test_loader = DataLoader(
        CIFAR100_test, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return CIFAR100_test_loader

def compute_mean_std(CIFAR100_dataset):
    """compute the mean and std of CIFAR100 dataset
    Args:
        CIFAR100_training_dataset or CIFAR100_test_dataset
        witch derived from class torch.utils.data
    
    Returns:
        a tuple contains mean, std value of entire dataset
    """

    data_r = numpy.dstack([CIFAR100_dataset[i][1][:, :, 0] for i in range(len(CIFAR100_dataset))])
    data_g = numpy.dstack([CIFAR100_dataset[i][1][:, :, 1] for i in range(len(CIFAR100_dataset))])
    data_b = numpy.dstack([CIFAR100_dataset[i][1][:, :, 2] for i in range(len(CIFAR100_dataset))])
    mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b)
    std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b)

    return mean, std

class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

In [20]:
from datetime import datetime
CIFAR10_TRAIN_MEAN = (0.49139968, 0.48215827 ,0.44653124)
CIFAR10_TRAIN_STD = (0.24703233, 0.24348505, 0.26158768)

CIFAR100_TRAIN_MEAN = (0.5088964127604166, 0.48739301317401956, 0.44194221124387256)
CIFAR100_TRAIN_STD = (0.2682515741720801, 0.2573637364478126, 0.2770957707973042)

#directory to save weights file
CHECKPOINT_PATH = 'checkpoint'

#total training epoches
EPOCH = 25
MILESTONES = [6, 12, 16]

#initial learning rate
#INIT_LR = 0.1

#time of we run the script
TIME_NOW = datetime.now().isoformat()

#tensorboard log dir
LOG_DIR = 'runs'

#save weights file per SAVE_EPOCH epoch
SAVE_EPOCH = 10

## Training the model

In [21]:
def train(epoch):

    net.train()
    for batch_index, (images, labels) in enumerate(CIFAR100_training_loader):
        if epoch <= 1:
            warmup_scheduler.step()

        images = Variable(images)
        labels = Variable(labels)

        labels = labels.cuda()
        images = images.cuda()

        optimizer.zero_grad()
        outputs = net(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
            loss.item(),
            optimizer.param_groups[0]['lr'],
            epoch=epoch,
            trained_samples=batch_index * 128 + len(images),
            total_samples=len(CIFAR100_training_loader.dataset)
        ))


    for name, param in net.named_parameters():
        layer, attr = os.path.splitext(name)
        attr = attr[1:]

def eval_training(epoch):
    net.eval()

    test_loss = 0.0 # cost function error
    correct = 0.0

    for (images, labels) in CIFAR100_test_loader:
        images = Variable(images)
        labels = Variable(labels)

        images = images.cuda()
        labels = labels.cuda()

        outputs = net(images)
        loss = loss_function(outputs, labels)
        test_loss += loss.item()
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum()

    print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}'.format(
        test_loss / len(CIFAR100_test_loader.dataset),
        correct.float() / len(CIFAR100_test_loader.dataset)
    ))
    print()


    return correct.float() / len(CIFAR100_test_loader.dataset)

if __name__ == '__main__':
    
#     parser = argparse.ArgumentParser()
#     parser.add_argument('-net', type=str, required=True, help='net type')
#     parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not')
#     parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader')
#     parser.add_argument('-b', type=int, default=128, help='batch size for dataloader')
#     parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset')
#     parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
#     parser.add_argument('-lr', type=float, default=0.1, help='initial learning rate')
#     args = parser.parse_args()

    net= get_network()
    
        
    #data preprocessing:
    CIFAR100_training_loader = get_training_dataloader(
        CIFAR100_TRAIN_MEAN,
        CIFAR100_TRAIN_STD,
        num_workers=2,
        batch_size=128,
        shuffle=True
    )
    
    CIFAR100_test_loader = get_test_dataloader(
        CIFAR100_TRAIN_MEAN,
        CIFAR100_TRAIN_STD,
        num_workers=2,
        batch_size=128,
        shuffle=True
    )
    
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=0.15) #learning rate decay
    iter_per_epoch = len(CIFAR100_training_loader)
    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * 1)
    checkpoint_path = os.path.join(CHECKPOINT_PATH, "resnetcbam18")

    #create checkpoint folder to save model
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)
    checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')

    best_acc = 0.0
    for epoch in range(1, EPOCH):
        if epoch > 1:
            train_scheduler.step(epoch)

        train(epoch)
        acc = eval_training(epoch)

        #start to save best performance model after learning rate decay to 0.01 
        if epoch > MILESTONES[1] and best_acc < acc:
            torch.save(net.state_dict(), checkpoint_path.format(net="resnetcbam18", epoch=epoch, type='best'))
            best_acc = acc
            continue

        if not epoch % SAVE_EPOCH:
            torch.save(net.state_dict(), checkpoint_path.format(net="resnetcbam18", epoch=epoch, type='regular'))
    print()
    print("best_acc: ", best_acc)
    

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:03<00:00, 48070033.32it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified




Training Epoch: 1 [128/50000]	Loss: 4.6507	LR: 0.000256
Training Epoch: 1 [256/50000]	Loss: 4.6324	LR: 0.000512
Training Epoch: 1 [384/50000]	Loss: 4.6493	LR: 0.000767
Training Epoch: 1 [512/50000]	Loss: 4.6173	LR: 0.001023
Training Epoch: 1 [640/50000]	Loss: 4.6510	LR: 0.001279
Training Epoch: 1 [768/50000]	Loss: 4.6976	LR: 0.001535
Training Epoch: 1 [896/50000]	Loss: 4.6573	LR: 0.001790
Training Epoch: 1 [1024/50000]	Loss: 4.7152	LR: 0.002046
Training Epoch: 1 [1152/50000]	Loss: 4.5731	LR: 0.002302
Training Epoch: 1 [1280/50000]	Loss: 4.6527	LR: 0.002558
Training Epoch: 1 [1408/50000]	Loss: 4.6436	LR: 0.002813
Training Epoch: 1 [1536/50000]	Loss: 4.6094	LR: 0.003069
Training Epoch: 1 [1664/50000]	Loss: 4.6279	LR: 0.003325
Training Epoch: 1 [1792/50000]	Loss: 4.6317	LR: 0.003581
Training Epoch: 1 [1920/50000]	Loss: 4.6196	LR: 0.003836
Training Epoch: 1 [2048/50000]	Loss: 4.6183	LR: 0.004092
Training Epoch: 1 [2176/50000]	Loss: 4.6311	LR: 0.004348
Training Epoch: 1 [2304/50000]	Loss: 4



Training Epoch: 2 [128/50000]	Loss: 3.5765	LR: 0.100000
Training Epoch: 2 [256/50000]	Loss: 3.5915	LR: 0.100000
Training Epoch: 2 [384/50000]	Loss: 3.4508	LR: 0.100000
Training Epoch: 2 [512/50000]	Loss: 3.6061	LR: 0.100000
Training Epoch: 2 [640/50000]	Loss: 3.3129	LR: 0.100000
Training Epoch: 2 [768/50000]	Loss: 3.4682	LR: 0.100000
Training Epoch: 2 [896/50000]	Loss: 3.3650	LR: 0.100000
Training Epoch: 2 [1024/50000]	Loss: 3.4019	LR: 0.100000
Training Epoch: 2 [1152/50000]	Loss: 3.2115	LR: 0.100000
Training Epoch: 2 [1280/50000]	Loss: 3.7266	LR: 0.100000
Training Epoch: 2 [1408/50000]	Loss: 3.4627	LR: 0.100000
Training Epoch: 2 [1536/50000]	Loss: 3.5186	LR: 0.100000
Training Epoch: 2 [1664/50000]	Loss: 3.5937	LR: 0.100000
Training Epoch: 2 [1792/50000]	Loss: 3.5174	LR: 0.100000
Training Epoch: 2 [1920/50000]	Loss: 3.4119	LR: 0.100000
Training Epoch: 2 [2048/50000]	Loss: 3.4305	LR: 0.100000
Training Epoch: 2 [2176/50000]	Loss: 3.4926	LR: 0.100000
Training Epoch: 2 [2304/50000]	Loss: 3

## Testing the Model


In [22]:
weights_file="./resnetcbam18.pth"
torch.save(net.state_dict(), weights_file)

In [23]:
if __name__ == '__main__':
    net = get_network()
    CIFAR100_test_loader = get_test_dataloader(
        CIFAR100_TRAIN_MEAN,
        CIFAR100_TRAIN_STD,
        #CIFAR100_PATH,
        num_workers=2,
        batch_size=128,
        shuffle=True
    )

    net.load_state_dict(torch.load(weights_file), True)
    print(net)
    net.eval()

    correct_1 = 0.0
    correct_5 = 0.0
    total = 0

    for n_iter, (image, label) in enumerate(CIFAR100_test_loader):
        print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(CIFAR100_test_loader)))
        image = Variable(image).cuda()
        label = Variable(label).cuda()
        output = net(image)
        _, pred = output.topk(5, 1, largest=True, sorted=True)

        label = label.view(label.size(0), -1).expand_as(pred)
        correct = pred.eq(label).float()

        #compute top 5
        correct_5 += correct[:, :5].sum()

        #compute top1 
        correct_1 += correct[:, :1].sum()


    print()
    print("Top 1 err: ", 1 - correct_1 / len(CIFAR100_test_loader.dataset))
    print("Top 5 err: ", 1 - correct_5 / len(CIFAR100_test_loader.dataset))
    print("Parameter numbers: {}".format(sum(p.numel() for p in net.parameters())))

Files already downloaded and verified
ResNetCBAM(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
      (cbam): CBAM(
        (ca): ChannelAttention(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (max_pool): AdaptiveMaxPool2d(output_size=1)
          (fc1): Conv2d(64, 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (relu1): ReLU()
          (fc2): Conv2d(4, 64, kernel_size=(1, 1), stride=(