# BAM + VGG/ALexNet/ResNet on Traffic Sign Classification

In [1]:
import imp
import sys
from PIL import Image
import os
import torchvision.models as models
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from collections import OrderedDict
# from model.attention.BAM import BAMBlock
import numpy as np
from torch.nn import init
from tensorflow import keras
sys.path.append('Documents/MLproj/G3/model/attention/BAM.py')

In [2]:
def produceImage(file_in, width, height, file_out):
    image = Image.open(file_in)
    resized_image = image.resize((width, height), Image.ANTIALIAS)
    resized_image.save(file_out)

## VGG-16

In [3]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.backbone = models.vgg16(pretrained=True)
        self.classifier = nn.Sequential(OrderedDict([
                ('fc1',   nn.Sequential(
                                        nn.Linear(512 * 7 * 7, 512),
                                        nn.ReLU())),
                ('fc2',   nn.Sequential(nn.Dropout(0.5),
                                        nn.Linear(512, 43)
                                        ))]))
    def forward(self,x):
        backbone_feat = self.backbone.features(x)
        backbone_feat = backbone_feat.view(-1, 512 * 7 * 7)
        score = self.classifier(backbone_feat)
        return backbone_feat, score

In [4]:
def VGG_16():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/'  
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = VGG16()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('vgg')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            feat,outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            for cur_pred, cur_label in zip(pred, labels):
                confusion_matrix[cur_pred, cur_label] += 1
                
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                feat,outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [5]:
VGG_16()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.089: 100%|██████████| 2451/2451 [15:21<00:00,  2.66it/s]
100%|██████████| 790/790 [01:49<00:00,  7.19it/s]


[epoch 1] train_loss: 0.227  val_accuracy: 0.965


train epoch[2/10] loss:0.000: 100%|██████████| 2451/2451 [15:23<00:00,  2.66it/s]
100%|██████████| 790/790 [01:42<00:00,  7.71it/s]


[epoch 2] train_loss: 0.056  val_accuracy: 0.962


train epoch[3/10] loss:0.000: 100%|██████████| 2451/2451 [15:10<00:00,  2.69it/s]
100%|██████████| 790/790 [01:42<00:00,  7.70it/s]


[epoch 3] train_loss: 0.042  val_accuracy: 0.967


train epoch[4/10] loss:0.000: 100%|██████████| 2451/2451 [15:07<00:00,  2.70it/s]
100%|██████████| 790/790 [01:42<00:00,  7.72it/s]


[epoch 4] train_loss: 0.029  val_accuracy: 0.973


train epoch[5/10] loss:0.000: 100%|██████████| 2451/2451 [15:13<00:00,  2.68it/s]
100%|██████████| 790/790 [01:43<00:00,  7.61it/s]


[epoch 5] train_loss: 0.027  val_accuracy: 0.968


train epoch[6/10] loss:0.000: 100%|██████████| 2451/2451 [15:14<00:00,  2.68it/s]
100%|██████████| 790/790 [01:43<00:00,  7.63it/s]


[epoch 6] train_loss: 0.023  val_accuracy: 0.973


train epoch[7/10] loss:0.000: 100%|██████████| 2451/2451 [15:16<00:00,  2.67it/s]
100%|██████████| 790/790 [01:42<00:00,  7.69it/s]


[epoch 7] train_loss: 0.022  val_accuracy: 0.963


train epoch[8/10] loss:0.000: 100%|██████████| 2451/2451 [15:20<00:00,  2.66it/s]
100%|██████████| 790/790 [01:45<00:00,  7.52it/s]


[epoch 8] train_loss: 0.015  val_accuracy: 0.975


train epoch[9/10] loss:0.000: 100%|██████████| 2451/2451 [15:24<00:00,  2.65it/s]
100%|██████████| 790/790 [01:45<00:00,  7.52it/s]


[epoch 9] train_loss: 0.018  val_accuracy: 0.978


train epoch[10/10] loss:0.000: 100%|██████████| 2451/2451 [15:31<00:00,  2.63it/s]
100%|██████████| 790/790 [01:45<00:00,  7.46it/s]

[epoch 10] train_loss: 0.015  val_accuracy: 0.969
Finished Training





## Bottleneck Attention Module

In [6]:
class Flatten(nn.Module):
    def forward(self,x):
        return x.view(x.shape[0],-1)

class ChannelAttention(nn.Module):
    def __init__(self,channel,reduction=16,num_layers=3):
        super().__init__()
        self.avgpool=nn.AdaptiveAvgPool2d(1)
        gate_channels=[channel]
        gate_channels+=[channel//reduction]*num_layers
        gate_channels+=[channel]


        self.ca=nn.Sequential()
        self.ca.add_module('flatten',Flatten())
        for i in range(len(gate_channels)-2):
            self.ca.add_module('fc%d'%i,nn.Linear(gate_channels[i],gate_channels[i+1]))
            self.ca.add_module('bn%d'%i,nn.BatchNorm1d(gate_channels[i+1]))
            self.ca.add_module('relu%d'%i,nn.ReLU())
        self.ca.add_module('last_fc',nn.Linear(gate_channels[-2],gate_channels[-1]))
        

    def forward(self, x) :
        res=self.avgpool(x)
        res=self.ca(res)
        res=res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
        return res

class SpatialAttention(nn.Module):
    def __init__(self,channel,reduction=16,num_layers=3,dia_val=2):
        super().__init__()
        self.sa=nn.Sequential()
        self.sa.add_module('conv_reduce1',nn.Conv2d(kernel_size=1,in_channels=channel,out_channels=channel//reduction))
        self.sa.add_module('bn_reduce1',nn.BatchNorm2d(channel//reduction))
        self.sa.add_module('relu_reduce1',nn.ReLU())
        for i in range(num_layers):
            self.sa.add_module('conv_%d'%i,nn.Conv2d(kernel_size=3,in_channels=channel//reduction,out_channels=channel//reduction,padding=1,dilation=dia_val))
            self.sa.add_module('bn_%d'%i,nn.BatchNorm2d(channel//reduction))
            self.sa.add_module('relu_%d'%i,nn.ReLU())
        self.sa.add_module('last_conv',nn.Conv2d(channel//reduction,1,kernel_size=1))

    def forward(self, x) :
        res=self.sa(x)
        res=res.expand_as(x)
        return res
    
class BAMBlock(nn.Module):

    def __init__(self, channel=512,reduction=16,dia_val=2):
        super().__init__()
        self.ca=ChannelAttention(channel=channel,reduction=reduction)
        self.sa=SpatialAttention(channel=channel,reduction=reduction,dia_val=dia_val)
        self.sigmoid=nn.Sigmoid()


    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def forward(self, x):
        b, c, _, _ = x.size()
        sa_out=self.sa(x)
        ca_out=self.ca(x)
        weight=self.sigmoid(sa_out+ca_out)
        out=(1+weight)*x
        return out

## BAM + VGG-16

In [7]:
class BAM_VGG16(nn.Module):
    def __init__(self):
        super(BAM_VGG16, self).__init__()
        self.backbone = models.vgg16(pretrained=True)
        self.BAM = BAMBlock()
        self.softmax = torch.nn.Softmax(dim=1)
        self.classifier = nn.Sequential(OrderedDict([
                ('fc1',   nn.Sequential(
                                        nn.Linear(512 * 7 * 7, 512),
                                        nn.ReLU())),
                ('fc2',   nn.Sequential(nn.Dropout(0.5),
                                        nn.Linear(512, 43)
                                        ))]))
    def forward(self,x):
        backbone_feat = self.backbone.features(x)
        backbone_feat_BAM = self.BAM(backbone_feat)

        backbone_feat_BAM = backbone_feat_BAM.view(-1, 512 * 7 * 7)
        score = self.classifier(backbone_feat_BAM)
        return backbone_feat_BAM, score

In [8]:
def BAMVGG16():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/' 
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = BAM_VGG16()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('BAM_VGG')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            feat,outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            for cur_pred, cur_label in zip(pred, labels):
                confusion_matrix[cur_pred, cur_label] += 1
            
            # print(f'{outputs}\n')    
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                feat,outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [9]:
BAMVGG16()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.014: 100%|██████████| 2451/2451 [15:51<00:00,  2.58it/s]
100%|██████████| 790/790 [01:47<00:00,  7.38it/s]


[epoch 1] train_loss: 0.226  val_accuracy: 0.954


train epoch[2/10] loss:0.001: 100%|██████████| 2451/2451 [15:44<00:00,  2.59it/s]
100%|██████████| 790/790 [01:47<00:00,  7.37it/s]


[epoch 2] train_loss: 0.059  val_accuracy: 0.962


train epoch[3/10] loss:0.001: 100%|██████████| 2451/2451 [16:05<00:00,  2.54it/s]
100%|██████████| 790/790 [01:40<00:00,  7.87it/s]


[epoch 3] train_loss: 0.036  val_accuracy: 0.964


train epoch[4/10] loss:0.000: 100%|██████████| 2451/2451 [14:20<00:00,  2.85it/s]
100%|██████████| 790/790 [01:33<00:00,  8.49it/s]


[epoch 4] train_loss: 0.030  val_accuracy: 0.971


train epoch[5/10] loss:0.002: 100%|██████████| 2451/2451 [13:03<00:00,  3.13it/s]
100%|██████████| 790/790 [01:24<00:00,  9.31it/s]


[epoch 5] train_loss: 0.026  val_accuracy: 0.962


train epoch[6/10] loss:0.000: 100%|██████████| 2451/2451 [12:40<00:00,  3.22it/s]
100%|██████████| 790/790 [01:28<00:00,  8.91it/s]


[epoch 6] train_loss: 0.020  val_accuracy: 0.972


train epoch[7/10] loss:0.004: 100%|██████████| 2451/2451 [14:15<00:00,  2.87it/s]
100%|██████████| 790/790 [01:35<00:00,  8.24it/s]


[epoch 7] train_loss: 0.021  val_accuracy: 0.965


train epoch[8/10] loss:0.000: 100%|██████████| 2451/2451 [14:31<00:00,  2.81it/s]
100%|██████████| 790/790 [01:40<00:00,  7.87it/s]


[epoch 8] train_loss: 0.017  val_accuracy: 0.963


train epoch[9/10] loss:0.000: 100%|██████████| 2451/2451 [14:50<00:00,  2.75it/s]
100%|██████████| 790/790 [01:41<00:00,  7.80it/s]


[epoch 9] train_loss: 0.014  val_accuracy: 0.967


train epoch[10/10] loss:0.000: 100%|██████████| 2451/2451 [15:23<00:00,  2.65it/s]
100%|██████████| 790/790 [01:49<00:00,  7.21it/s]

[epoch 10] train_loss: 0.017  val_accuracy: 0.968
Finished Training





In [10]:
#https://github.com/asdf2kr/BAM-CBAM-pytorch/tree/master/Models

def conv1x1(in_channels, out_channels, stride=1):
    ''' 1x1 convolution '''
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)

def conv3x3(in_channels, out_channels, stride=1, padding=1, dilation=1):
    ''' 3x3 convolution '''
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, dilation=dilation, bias=False)

def conv7x7(in_channels, out_channels, stride=1, padding=3, dilation=1):
    ''' 7x7 convolution '''
    return nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=stride, padding=padding, dilation=dilation, bias=False)

In [11]:
class BAM(nn.Module):
    def __init__(self, in_channel, reduction_ratio, dilation):
        super(BAM, self).__init__()
        self.hid_channel = in_channel // reduction_ratio
        self.dilation = dilation
        self.globalAvgPool = nn.AdaptiveAvgPool2d(1)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

        self.fc1 = nn.Linear(in_features=in_channel, out_features=self.hid_channel)
        self.bn1_1d = nn.BatchNorm1d(self.hid_channel)
        self.fc2 = nn.Linear(in_features=self.hid_channel, out_features=in_channel)
        self.bn2_1d = nn.BatchNorm1d(in_channel)

        self.conv1 = conv1x1(in_channel, self.hid_channel)
        self.bn1_2d = nn.BatchNorm2d(self.hid_channel)
        self.conv2 = conv3x3(self.hid_channel, self.hid_channel, stride=1, padding=self.dilation, dilation=self.dilation)
        self.bn2_2d = nn.BatchNorm2d(self.hid_channel)
        self.conv3 = conv3x3(self.hid_channel, self.hid_channel, stride=1, padding=self.dilation, dilation=self.dilation)
        self.bn3_2d = nn.BatchNorm2d(self.hid_channel)
        self.conv4 = conv1x1(self.hid_channel, 1)
        self.bn4_2d = nn.BatchNorm2d(1)

    def forward(self, x):
        # Channel attention
        Mc = self.globalAvgPool(x)
        Mc = Mc.view(Mc.size(0), -1)

        Mc = self.fc1(Mc)
        Mc = self.bn1_1d(Mc)
        Mc = self.relu(Mc)

        Mc = self.fc2(Mc)
        Mc = self.bn2_1d(Mc)
        Mc = self.relu(Mc)

        Mc = Mc.view(Mc.size(0), Mc.size(1), 1, 1)

        # Spatial attention
        Ms = self.conv1(x)
        Ms = self.bn1_2d(Ms)
        Ms = self.relu(Ms)

        Ms = self.conv2(Ms)
        Ms = self.bn2_2d(Ms)
        Ms = self.relu(Ms)

        Ms = self.conv3(Ms)
        Ms = self.bn3_2d(Ms)
        Ms = self.relu(Ms)

        Ms = self.conv4(Ms)
        Ms = self.bn4_2d(Ms)
        Ms = self.relu(Ms)

        Ms = Ms.view(x.size(0), 1, x.size(2), x.size(3))
        Mf = 1 + self.sigmoid(Mc * Ms)
        return x * Mf

https://github.com/asdf2kr/BAM-CBAM-pytorch/tree/master/Models

In [22]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, hid_channels, atte='bam', ratio=16, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, hid_channels, stride)
        self.bn1 = nn.BatchNorm2d(hid_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(hid_channels, hid_channels)
        self.bn2 = nn.BatchNorm2d(hid_channels)
        self.downsample = downsample

        if atte == 'cbam':
            self.atte = CBAM(hid_channels, ratio)
        else:
            self.atte = None

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        # CBAM
        if not self.atte is None:
            out = self.atte(out)

        out += residual
        out = self.relu(out)

        return out

class BottleneckBlock(nn.Module): # bottelneck-block, over the 50 layers.
    expansion = 4
    def __init__(self, in_channels, hid_channels, atte='bam', ratio=16, stride=1, downsample=None):
        super(BottleneckBlock, self).__init__()
        self.downsample = downsample
        out_channels = hid_channels * self.expansion
        self.conv1 = conv1x1(in_channels, hid_channels)
        self.bn1 = nn.BatchNorm2d(hid_channels)

        self.conv2 = conv3x3(hid_channels, hid_channels, stride)
        self.bn2 = nn.BatchNorm2d(hid_channels)

        self.conv3 = conv1x1(hid_channels, out_channels)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.relu = nn.ReLU(inplace=True)

        if atte == 'cbam':
            self.atte = CBAM(out_channels, ratio)
        else:
            self.atte = None

    def forward(self, x):
        residual = x # indentity
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        if not self.atte is None:
            out = self.atte(out)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    '''
    *50-layer
        conv1 (output: 112x112)
            7x7, 64, stride 2
        conv2 (output: 56x56)
            3x3 max pool, stride 2
            [ 1x1, 64  ]
            [ 3x3, 64  ] x 3
            [ 1x1, 256 ]
        cov3 (output: 28x28)
            [ 1x1, 128 ]
            [ 3x3, 128 ] x 4
            [ 1x1, 512 ]
        cov4 (output: 14x14)
            [ 1x1, 256 ]
            [ 3x3, 256 ] x 6
            [ 1x1, 1024]
        cov5 (output: 28x28)
            [ 1x1, 512 ]
            [ 3x3, 512 ] x 3
            [ 1x1, 2048]
        _ (output: 1x1)
            average pool, 100-d fc, softmax
        FLOPs 3.8x10^9
    '''
    '''
    *101-layer
        conv1 (output: 112x112)
            7x7, 64, stride 2
        conv2 (output: 56x56)
            3x3 max pool, stride 2
            [ 1x1, 64  ]
            [ 3x3, 64  ] x 3
            [ 1x1, 256 ]
        cov3 (output: 28x28)
            [ 1x1, 128 ]
            [ 3x3, 128 ] x 4
            [ 1x1, 512 ]
        cov4 (output: 14x14)
            [ 1x1, 256 ]
            [ 3x3, 256 ] x 23
            [ 1x1, 1024]
        cov5 (output: 28x28)
            [ 1x1, 512 ]
            [ 3x3, 512 ] x 3
            [ 1x1, 2048]
        _ (output: 1x1)
            average pool, 100-d fc, softmax
        FLOPs 7.6x10^9
    '''
    def __init__(self, block, layers, num_classes=1000, atte='bam', ratio=16, dilation=4):
        super(ResNet, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

        self.layers = layers
        self.in_channels = 64
        self.atte = atte
        self.ratio = ratio
        self.dilation = dilation

        if num_classes == 1000:
            self.conv1 = nn.Sequential(
                nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            )
        else:
            self.conv1 = nn.Sequential(
                nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True)
            )

        if self.atte == 'bam':
            self.bam1 = BAM(64*block.expansion, self.ratio, self.dilation)
            self.bam2 = BAM(128*block.expansion, self.ratio, self.dilation)
            self.bam3 = BAM(256*block.expansion, self.ratio, self.dilation)

        self.conv2 = self.get_layers(block, 64, self.layers[0])
        self.conv3 = self.get_layers(block, 128, self.layers[1], stride=2)
        self.conv4 = self.get_layers(block, 256, self.layers[2], stride=2)
        self.conv5 = self.get_layers(block, 512, self.layers[3], stride=2)
        self.avgPool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        torch.nn.init.kaiming_normal_(self.fc.weight)
        for m in self.state_dict():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        torch.nn.init.kaiming_normal_(self.fc.weight)
        for m in self.state_dict():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def get_layers(self, block, hid_channels, n_layers, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != hid_channels * block.expansion:
            downsample = nn.Sequential(
                    conv1x1(self.in_channels, hid_channels * block.expansion, stride),
                    nn.BatchNorm2d(hid_channels * block.expansion),
            )
        layers = []
        layers.append(block(self.in_channels, hid_channels, self.atte, self.ratio, stride, downsample))
        self.in_channels = hid_channels * block.expansion

        for _ in range(1, n_layers):
            layers.append(block(self.in_channels, hid_channels, self.atte, self.ratio))
        return nn.Sequential(*layers)

    def forward(self, x):
        '''
            Example tensor shape based on resnet101
        '''

        x = self.conv1(x)

        x = self.conv2(x)
        if self.atte == 'bam':
            x = self.bam1(x)

        x = self.conv3(x)
        if self.atte == 'bam':
            x = self.bam2(x)

        x = self.conv4(x)
        if self.atte == 'bam':
            x = self.bam3(x)

        x = self.conv5(x)
        x = self.avgPool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)

def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)

def BAMresnet50(**kwargs):
    return ResNet(BottleneckBlock, [3, 4, 6, 3], **kwargs)

def resnet50(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)

def resnet101(**kwargs):
    ''' ResNet-101 Model'''
    return ResNet(BottleneckBlock, [3, 4, 23, 3], **kwargs)

def resnet152(**kwargs):
    return ResNet(BottleneckBlock, [3, 8, 36, 3], **kwargs)

## ResNet-50

In [23]:
def Resnet50():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/' 
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = resnet50()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('Res')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        # confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            #for cur_pred, cur_label in zip(pred, labels):
                # confusion_matrix[cur_pred, cur_label] += 1
                
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [24]:
Resnet50()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.003: 100%|██████████| 2451/2451 [05:32<00:00,  7.38it/s]
100%|██████████| 790/790 [00:35<00:00, 22.01it/s]


[epoch 1] train_loss: 0.754  val_accuracy: 0.897


train epoch[2/10] loss:0.014: 100%|██████████| 2451/2451 [05:58<00:00,  6.84it/s]
100%|██████████| 790/790 [00:36<00:00, 21.74it/s]


[epoch 2] train_loss: 0.119  val_accuracy: 0.931


train epoch[3/10] loss:0.003: 100%|██████████| 2451/2451 [06:02<00:00,  6.76it/s]
100%|██████████| 790/790 [00:36<00:00, 21.80it/s]


[epoch 3] train_loss: 0.071  val_accuracy: 0.917


train epoch[4/10] loss:0.006: 100%|██████████| 2451/2451 [05:43<00:00,  7.14it/s]
100%|██████████| 790/790 [00:34<00:00, 23.05it/s]


[epoch 4] train_loss: 0.052  val_accuracy: 0.956


train epoch[5/10] loss:0.002: 100%|██████████| 2451/2451 [05:36<00:00,  7.28it/s]
100%|██████████| 790/790 [00:33<00:00, 23.39it/s]


[epoch 5] train_loss: 0.036  val_accuracy: 0.955


train epoch[6/10] loss:0.024: 100%|██████████| 2451/2451 [05:37<00:00,  7.25it/s]
100%|██████████| 790/790 [00:33<00:00, 23.29it/s]


[epoch 6] train_loss: 0.033  val_accuracy: 0.952


train epoch[7/10] loss:0.273: 100%|██████████| 2451/2451 [05:36<00:00,  7.27it/s]
100%|██████████| 790/790 [00:33<00:00, 23.51it/s]


[epoch 7] train_loss: 0.024  val_accuracy: 0.955


train epoch[8/10] loss:0.001: 100%|██████████| 2451/2451 [05:38<00:00,  7.25it/s]
100%|██████████| 790/790 [00:34<00:00, 23.22it/s]


[epoch 8] train_loss: 0.021  val_accuracy: 0.952


train epoch[9/10] loss:0.000: 100%|██████████| 2451/2451 [05:36<00:00,  7.29it/s]
100%|██████████| 790/790 [00:33<00:00, 23.39it/s]


[epoch 9] train_loss: 0.019  val_accuracy: 0.969


train epoch[10/10] loss:0.009: 100%|██████████| 2451/2451 [05:35<00:00,  7.30it/s]
100%|██████████| 790/790 [00:33<00:00, 23.40it/s]

[epoch 10] train_loss: 0.017  val_accuracy: 0.968
Finished Training





## BAM + ResNet-50

In [13]:
def BAM_Resnet50():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/' 
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = BAMresnet50()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('BAM_Res')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        # confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            #for cur_pred, cur_label in zip(pred, labels):
                # confusion_matrix[cur_pred, cur_label] += 1
                
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [14]:
BAM_Resnet50()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.260: 100%|██████████| 2451/2451 [12:02<00:00,  3.39it/s]
100%|██████████| 790/790 [01:02<00:00, 12.65it/s]


[epoch 1] train_loss: 1.085  val_accuracy: 0.851


train epoch[2/10] loss:0.007: 100%|██████████| 2451/2451 [11:44<00:00,  3.48it/s]
100%|██████████| 790/790 [01:02<00:00, 12.68it/s]


[epoch 2] train_loss: 0.144  val_accuracy: 0.949


train epoch[3/10] loss:0.001: 100%|██████████| 2451/2451 [11:41<00:00,  3.50it/s]
100%|██████████| 790/790 [01:02<00:00, 12.67it/s]


[epoch 3] train_loss: 0.092  val_accuracy: 0.849


train epoch[4/10] loss:0.545: 100%|██████████| 2451/2451 [11:44<00:00,  3.48it/s]
100%|██████████| 790/790 [01:03<00:00, 12.52it/s]


[epoch 4] train_loss: 0.066  val_accuracy: 0.867


train epoch[5/10] loss:0.001: 100%|██████████| 2451/2451 [11:44<00:00,  3.48it/s]
100%|██████████| 790/790 [01:03<00:00, 12.45it/s]


[epoch 5] train_loss: 0.057  val_accuracy: 0.937


train epoch[6/10] loss:0.000: 100%|██████████| 2451/2451 [11:48<00:00,  3.46it/s]
100%|██████████| 790/790 [01:03<00:00, 12.45it/s]


[epoch 6] train_loss: 0.044  val_accuracy: 0.952


train epoch[7/10] loss:0.009: 100%|██████████| 2451/2451 [11:44<00:00,  3.48it/s]
100%|██████████| 790/790 [01:03<00:00, 12.47it/s]


[epoch 7] train_loss: 0.037  val_accuracy: 0.955


train epoch[8/10] loss:0.006: 100%|██████████| 2451/2451 [11:51<00:00,  3.45it/s]
100%|██████████| 790/790 [01:03<00:00, 12.37it/s]


[epoch 8] train_loss: 0.033  val_accuracy: 0.957


train epoch[9/10] loss:0.002: 100%|██████████| 2451/2451 [11:47<00:00,  3.46it/s]
100%|██████████| 790/790 [01:03<00:00, 12.36it/s]


[epoch 9] train_loss: 0.026  val_accuracy: 0.957


train epoch[10/10] loss:0.003: 100%|██████████| 2451/2451 [11:44<00:00,  3.48it/s]
100%|██████████| 790/790 [01:03<00:00, 12.42it/s]

[epoch 10] train_loss: 0.027  val_accuracy: 0.948
Finished Training





In [15]:
class AlexNet(nn.Module):    
    def __init__(self):
        super(AlexNet, self).__init__()
        self.backbone = models.alexnet(pretrained=True)
        # self.softmax = torch.nn.Softmax(dim=1)
        self.classifier = nn.Sequential(OrderedDict([
                ('fc1',   nn.Sequential(
                                        nn.Linear(256 * 6 * 6, 512),
                                        nn.ReLU())),
                ('fc2',   nn.Sequential(nn.Dropout(0.5),
                                        nn.Linear(512, 43)
                                        ))]))
    def forward(self,x):        
        backbone_feat = self.backbone.features(x)
        backbone_feat = backbone_feat.view(-1, 256 * 6 * 6)
        score = self.classifier(backbone_feat)
        return backbone_feat, score

In [16]:
def Alex_Net():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/'  
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = AlexNet()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('Alex')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            feat,outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            for cur_pred, cur_label in zip(pred, labels):
                confusion_matrix[cur_pred, cur_label] += 1
                
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                feat,outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [17]:
Alex_Net()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.134: 100%|██████████| 2451/2451 [00:51<00:00, 47.56it/s]
100%|██████████| 790/790 [00:10<00:00, 76.28it/s] 


[epoch 1] train_loss: 0.341  val_accuracy: 0.941


train epoch[2/10] loss:0.004: 100%|██████████| 2451/2451 [00:49<00:00, 49.20it/s]
100%|██████████| 790/790 [00:10<00:00, 76.62it/s] 


[epoch 2] train_loss: 0.056  val_accuracy: 0.945


train epoch[3/10] loss:0.001: 100%|██████████| 2451/2451 [00:50<00:00, 48.21it/s]
100%|██████████| 790/790 [00:10<00:00, 76.38it/s] 


[epoch 3] train_loss: 0.037  val_accuracy: 0.950


train epoch[4/10] loss:0.001: 100%|██████████| 2451/2451 [00:50<00:00, 48.39it/s]
100%|██████████| 790/790 [00:10<00:00, 76.22it/s] 


[epoch 4] train_loss: 0.028  val_accuracy: 0.948


train epoch[5/10] loss:0.001: 100%|██████████| 2451/2451 [00:50<00:00, 48.73it/s]
100%|██████████| 790/790 [00:10<00:00, 75.24it/s] 


[epoch 5] train_loss: 0.021  val_accuracy: 0.962


train epoch[6/10] loss:0.002: 100%|██████████| 2451/2451 [00:50<00:00, 48.74it/s]
100%|██████████| 790/790 [00:10<00:00, 76.42it/s] 


[epoch 6] train_loss: 0.018  val_accuracy: 0.959


train epoch[7/10] loss:0.000: 100%|██████████| 2451/2451 [00:50<00:00, 48.90it/s]
100%|██████████| 790/790 [00:10<00:00, 76.59it/s] 


[epoch 7] train_loss: 0.015  val_accuracy: 0.958


train epoch[8/10] loss:0.000: 100%|██████████| 2451/2451 [00:49<00:00, 49.10it/s]
100%|██████████| 790/790 [00:10<00:00, 76.15it/s] 


[epoch 8] train_loss: 0.016  val_accuracy: 0.954


train epoch[9/10] loss:0.000: 100%|██████████| 2451/2451 [00:49<00:00, 49.25it/s]
100%|██████████| 790/790 [00:10<00:00, 75.86it/s] 


[epoch 9] train_loss: 0.014  val_accuracy: 0.957


train epoch[10/10] loss:0.005: 100%|██████████| 2451/2451 [00:49<00:00, 49.45it/s]
100%|██████████| 790/790 [00:10<00:00, 76.55it/s] 

[epoch 10] train_loss: 0.013  val_accuracy: 0.959
Finished Training





## BAM + AlexNet

In [18]:
class BAM_alexnet(nn.Module):    
    def __init__(self):
        super(BAM_alexnet, self).__init__()
        self.backbone = models.alexnet(pretrained=True)
        self.BAM = BAMBlock(channel=256,dia_val=1)
        self.softmax = torch.nn.Softmax(dim=1)
        self.classifier = nn.Sequential(OrderedDict([
                ('fc1',   nn.Sequential(
                                        nn.Linear(256 * 6 * 6, 512),
                                        nn.ReLU())),
                ('fc2',   nn.Sequential(nn.Dropout(0.5),
                                        nn.Linear(512, 43)
                                        ))]))
    def forward(self,x):
        backbone_feat = self.backbone.features(x)
        backbone_feat_BAM = self.BAM(backbone_feat)

        backbone_feat_BAM = backbone_feat_BAM.view(-1, 256 * 6 * 6)
        score = self.classifier(backbone_feat_BAM)
        return backbone_feat_BAM, score

In [19]:
def BAMAlexNet():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if (torch.cuda.is_available()):
        print(f'Current device:{torch.cuda.current_device()}\nName of device:{torch.cuda.get_device_name(0)}\n')
    print("using {} device.".format(device))
    
    data_transform = {
        "train": transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "validation": transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    image_path = './dataset/' 
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    exp_list = train_dataset.class_to_idx

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "Test"),
                                            transform=data_transform["validation"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = BAM_alexnet()
    net.to(device)
    loss_function = nn.CrossEntropyLoss()  #交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 10
    best_acc = 0.0
    save_path = './{}Net.pth'.format('BAM_Alex')
    train_steps = len(train_loader)
    for epoch in range(epochs):
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        
        confusion_matrix = torch.zeros((43,43),dtype=torch.int)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            feat,outputs = net(images.to(device))

            pred = torch.argmax(outputs.clone().detach().cpu(),dim=1)

            for cur_pred, cur_label in zip(pred, labels):
                confusion_matrix[cur_pred, cur_label] += 1
                
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # print(confusion_matrix)

        # validate
        net.eval()
        acc = 0.0 
        with torch.no_grad():  #验证的过程不计算损失梯度
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                feat,outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')

In [20]:
BAMAlexNet()

Current device:0
Name of device:NVIDIA TITAN RTX

using cuda:0 device.
Using 8 dataloader workers every process
using 39209 images for training, 12630 images for validation.


train epoch[1/10] loss:0.063: 100%|██████████| 2451/2451 [00:56<00:00, 43.40it/s]
100%|██████████| 790/790 [00:10<00:00, 72.19it/s] 


[epoch 1] train_loss: 0.329  val_accuracy: 0.946


train epoch[2/10] loss:0.001: 100%|██████████| 2451/2451 [00:56<00:00, 43.42it/s]
100%|██████████| 790/790 [00:10<00:00, 72.29it/s] 


[epoch 2] train_loss: 0.061  val_accuracy: 0.953


train epoch[3/10] loss:0.003: 100%|██████████| 2451/2451 [00:56<00:00, 43.47it/s]
100%|██████████| 790/790 [00:10<00:00, 72.58it/s] 


[epoch 3] train_loss: 0.034  val_accuracy: 0.949


train epoch[4/10] loss:0.008: 100%|██████████| 2451/2451 [00:56<00:00, 43.59it/s]
100%|██████████| 790/790 [00:10<00:00, 72.18it/s] 


[epoch 4] train_loss: 0.028  val_accuracy: 0.956


train epoch[5/10] loss:0.000: 100%|██████████| 2451/2451 [00:56<00:00, 43.52it/s]
100%|██████████| 790/790 [00:10<00:00, 72.10it/s] 


[epoch 5] train_loss: 0.022  val_accuracy: 0.949


train epoch[6/10] loss:0.133: 100%|██████████| 2451/2451 [00:56<00:00, 43.56it/s]
100%|██████████| 790/790 [00:10<00:00, 72.47it/s] 


[epoch 6] train_loss: 0.020  val_accuracy: 0.923


train epoch[7/10] loss:0.000: 100%|██████████| 2451/2451 [00:56<00:00, 43.54it/s]
100%|██████████| 790/790 [00:10<00:00, 72.40it/s] 


[epoch 7] train_loss: 0.016  val_accuracy: 0.944


train epoch[8/10] loss:0.023: 100%|██████████| 2451/2451 [00:56<00:00, 43.32it/s]
100%|██████████| 790/790 [00:10<00:00, 72.01it/s] 


[epoch 8] train_loss: 0.016  val_accuracy: 0.953


train epoch[9/10] loss:0.000: 100%|██████████| 2451/2451 [00:56<00:00, 43.58it/s]
100%|██████████| 790/790 [00:10<00:00, 71.88it/s] 


[epoch 9] train_loss: 0.014  val_accuracy: 0.951


train epoch[10/10] loss:0.001: 100%|██████████| 2451/2451 [00:56<00:00, 43.42it/s]
100%|██████████| 790/790 [00:10<00:00, 71.99it/s] 


[epoch 10] train_loss: 0.012  val_accuracy: 0.961
Finished Training
