# Deep Learning Homework 2 Part B

In [1]:
# check whether the torch cuda is ok
import torch
import os
os.environ['CUDA_VISIBLE_DEVICES']='0,1'
torch.cuda.is_available()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
%matplotlib inline

In [3]:
# import data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

## Note that: here we provide a basic solution for loading data and transforming data.
## You can directly change it if you find something wrong or not good enough.

## the mean and standard variance of imagenet dataset
## mean_vals = [0.485, 0.456, 0.406]
## std_vals = [0.229, 0.224, 0.225]

def load_data(data_dir = "dataset/",input_size = 224,batch_size = 36,data_aug=False):
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'train-aug': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    ## The default dir is for the first task of large-scale deep learning
    ## For other tasks, you may need to modify the data dir or even rewrite some part of 'data.py'
    if data_aug==False:
        image_dataset_train = datasets.ImageFolder(os.path.join(data_dir, '2-Medium-Scale', 'train'), data_transforms['train'])
    else:
        image_dataset_train = datasets.ImageFolder(os.path.join(data_dir, '2-Medium-Scale', 'train'), data_transforms['train-aug'])
    image_dataset_valid = datasets.ImageFolder(os.path.join(data_dir,'test'), data_transforms['test'])

    train_loader = DataLoader(image_dataset_train, batch_size=batch_size, shuffle=True, num_workers=4)
    valid_loader = DataLoader(image_dataset_valid, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_loader, valid_loader


# load the trained model
def load_model(device,model_name,optimizer_name):
    return torch.load(model_name+" with "+optimizer_name+ " best_model.pt",device)


In [4]:
from torchvision import models
import torch.nn as nn



def baselineModel(num_classes):
    model_resnet = models.resnet18(pretrained=False)
    num_features = model_resnet.fc.in_features
    model_resnet.fc = nn.Linear(num_features, num_classes)
    model_resnet.name = "Medium-Scale Baseline Learning"
    return model_resnet


In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import os
from torch.utils.tensorboard import SummaryWriter

## Note that: here we provide a basic solution for training and validation.
## You can directly change it if you find something wrong or not good enough.

def train_model(model,train_loader, valid_loader, criterion, optimizer, lr_scheduler = None,num_epochs=20, logdir = 'logs_part_b'):
    def train(model, train_loader,optimizer,criterion,lr_scheduler):
        model.train(True)
        total_loss = 0.0
        total_correct = 0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predictions = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * inputs.size(0)
            total_correct += torch.sum(predictions == labels.data)
        if lr_scheduler is not None:
            lr_scheduler.step()
        epoch_loss = total_loss / len(train_loader.dataset)
        epoch_acc = total_correct.double() / len(train_loader.dataset)
        return epoch_loss, epoch_acc.item()

    def valid(model, valid_loader,criterion):
        model.train(False)
        total_loss = 0.0
        total_correct = 0
        for inputs, labels in valid_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predictions = torch.max(outputs, 1)
            total_loss += loss.item() * inputs.size(0)
            total_correct += torch.sum(predictions == labels.data)
        epoch_loss = total_loss / len(valid_loader.dataset)
        epoch_acc = total_correct.double() / len(valid_loader.dataset)
        return epoch_loss, epoch_acc.item()

    best_acc = 0.0
    train_loss_curves = []
    train_acc_curves = []
    valid_loss_curves = []
    valid_acc_curves = []
    writer = SummaryWriter(os.path.join(logdir,model.name + ' with ' + optimizer.name))
    for epoch in range(num_epochs):
        print('epoch:{:d}/{:d}'.format(epoch, num_epochs))
        print('*' * 100)
        train_loss, train_acc = train(model, train_loader,optimizer,criterion,lr_scheduler)
        print("training: {:.4f}, {:.4f}".format(train_loss, train_acc))
        valid_loss, valid_acc = valid(model, valid_loader,criterion)
        print("validation: {:.4f}, {:.4f}".format(valid_loss, valid_acc))
        train_acc_curves.append(train_acc)
        train_loss_curves.append(train_loss)
        valid_acc_curves.append(valid_acc)
        valid_loss_curves.append(valid_loss)
        writer.add_scalars(os.path.join(model.name+" with "+optimizer.name,'loss curves'),{'train':train_loss,'valid':valid_loss},epoch)
        writer.add_scalars(os.path.join(model.name+" with "+optimizer.name,'acc curves'),{'train':train_acc,'valid':valid_acc},epoch)
        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = model
            torch.save(best_model, model.name+' with '+optimizer.name +' best_model.pt')
    writer.close()
    return train_loss_curves,train_acc_curves,valid_loss_curves,valid_acc_curves


In [6]:
import torch.nn as nn
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional
class LKA(nn.Module):
    def __init__(
        self,
        dim:int
        ) -> None :
        super().__init__()
        self.conv1 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
        self.conv_spatial = nn.Conv2d(dim,dim,7,stride=1,padding=9,groups = dim ,dilation=3)
        self.conv2 = nn.Conv2d(dim,dim,1)

    def forward(self,x:Tensor) -> Tensor:
        u=x.clone()
        attn = self.conv1(x)
        attn = self.conv_spatial(attn)
        attn = self.conv2(attn)
        return u*attn

class Attention(nn.Module):
    def __init__(
        self,
        in_dim: int,
        bottleneck_dim: int,
        out_dim: int
    ) -> None:
        super().__init__()
        self.proj_1 = nn.Conv2d(in_dim, bottleneck_dim, 1)
        self.norm_layer_1 = nn.BatchNorm2d(bottleneck_dim)
        self.activation = nn.GELU()
        self.spatial_gating_unit = LKA(bottleneck_dim)
        self.proj_2 = nn.Conv2d(bottleneck_dim, out_dim, 1)
        self.norm_layer_2 = nn.BatchNorm2d(out_dim)
        self.downsample = None if in_dim==out_dim else nn.Sequential(
            nn.Conv2d(in_dim,out_dim,1),
            nn.BatchNorm2d(out_dim)
        )
    
    def forward(self,x:Tensor)->Tensor:
        out = self.proj_1(x)
        out = self.norm_layer_1(out)
        out = self.activation(out)
        out = self.spatial_gating_unit(out)
        out = self.norm_layer_2(out)
        out = self.proj_2(out)
        if (self.downsample is not None):
            x=self.downsample(x)
        out = out + x
        out = self.activation(out)
        return out


In [24]:
from typing import Type, Any, Callable, Union, List, Optional
import torch.optim as optim
import torch.nn as nn
from torchvision import models
def test_model(model:Optional[nn.Module]=models.resnet18(pretrained=False), optimizer:Optional[optim.Optimizer]=None, stepLRScheduler = False, data_aug=False, step_size=50):
    ## about data
    data_dir = "dataset/" ## You need to specify the data_dir first
    input_size = 224
    batch_size = 24

    ## about training
    num_epochs = 200
    lr = 0.001

    ## model initialization
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    ## data preparation
    train_loader, valid_loader = load_data(data_dir=data_dir,input_size=input_size, batch_size=batch_size,data_aug=data_aug)

    ## optimizer
    if optimizer is None:
        ## SGD optimizer
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
        optimizer.name = 'SGD'
    if stepLRScheduler == True:
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=step_size, gamma=0.2)
    else:
        lr_scheduler = None
    ## loss function
    criterion = nn.CrossEntropyLoss()
    train_loss_curves,train_acc_curves,valid_loss_curves,valid_acc_curves = train_model(model,train_loader, valid_loader, criterion, optimizer, lr_scheduler=lr_scheduler ,num_epochs=num_epochs)


In [8]:
# SGD optimizer without data augment
num_classes=10
model = baselineModel(num_classes)
model.name = 'Resnet without data augment'
test_model(model)

epoch:0/200
****************************************************************************************************
training: 1.9628, 0.3100
validation: 1.6346, 0.4250
epoch:1/200
****************************************************************************************************
training: 1.5447, 0.4370
validation: 3.3247, 0.2530
epoch:2/200
****************************************************************************************************
training: 1.3526, 0.5180
validation: 1.4146, 0.4859
epoch:3/200
****************************************************************************************************
training: 1.2474, 0.5810
validation: 1.3347, 0.5220
epoch:4/200
****************************************************************************************************
training: 1.1236, 0.6060
validation: 1.7072, 0.4746
epoch:5/200
****************************************************************************************************
training: 1.0828, 0.6100
validation: 1.1322, 0.6163
epoch:6/20

In [9]:

## about new model
num_classes = 10
model = baselineModel(num_classes)
model.name = 'Resnet with 4 LKA layer without data augment'
model.layer1 = Attention(64,64,64)
model.layer2 = Attention(64,128,128)
model.layer3 = Attention(128,256,256)
model.layer4 = Attention(256,512,512)
test_model(model)

epoch:0/200
****************************************************************************************************
training: 2.0912, 0.2910
validation: 2.0561, 0.3454
epoch:1/200
****************************************************************************************************
training: 1.7567, 0.4380
validation: 1.7153, 0.4756
epoch:2/200
****************************************************************************************************
training: 1.5486, 0.5080
validation: 1.4738, 0.5574
epoch:3/200
****************************************************************************************************
training: 1.3922, 0.5570
validation: 1.3831, 0.5739
epoch:4/200
****************************************************************************************************
training: 1.3225, 0.5510
validation: 1.7773, 0.3981
epoch:5/200
****************************************************************************************************
training: 1.2287, 0.5840
validation: 1.5108, 0.5315
epoch:6/20

In [10]:
# SGD optimizer with data augment
num_classes=10
model = baselineModel(num_classes)
model.name = 'Resnet with data augment'
test_model(model,data_aug=True)

epoch:0/200
****************************************************************************************************
training: 2.0122, 0.2980
validation: 1.8496, 0.3828
epoch:1/200
****************************************************************************************************
training: 1.5930, 0.4390
validation: 1.5342, 0.4580
epoch:2/200
****************************************************************************************************
training: 1.4989, 0.4780
validation: 1.7469, 0.3869
epoch:3/200
****************************************************************************************************
training: 1.3906, 0.5110
validation: 1.6185, 0.4491
epoch:4/200
****************************************************************************************************
training: 1.3502, 0.5200
validation: 1.4404, 0.4948
epoch:5/200
****************************************************************************************************
training: 1.3119, 0.5380
validation: 1.4023, 0.5411
epoch:6/20

In [11]:
## about new model
num_classes = 10
model = baselineModel(num_classes)
model.name = 'Resnet with 4 LKA layer with data augment'
model.layer1 = Attention(64,64,64)
model.layer2 = Attention(64,128,128)
model.layer3 = Attention(128,256,256)
model.layer4 = Attention(256,512,512)
test_model(model,data_aug=True)

epoch:0/200
****************************************************************************************************
training: 2.1061, 0.2590
validation: 2.0200, 0.3187
epoch:1/200
****************************************************************************************************
training: 1.7831, 0.4380
validation: 1.6420, 0.4943
epoch:2/200
****************************************************************************************************
training: 1.6209, 0.4630
validation: 1.5460, 0.5243
epoch:3/200
****************************************************************************************************
training: 1.4894, 0.5210
validation: 1.4748, 0.5457
epoch:4/200
****************************************************************************************************
training: 1.4402, 0.5150
validation: 1.7253, 0.4489
epoch:5/200
****************************************************************************************************
training: 1.3897, 0.5220
validation: 1.4414, 0.5304
epoch:6/20

In [26]:
import torch.optim as optim
# Adam optimizer without data augment
num_classes = 10
model = baselineModel(num_classes)
model.name = 'Resnet StepLR'
lr = 0.001
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
optimizer.name = 'SGD'
test_model(model,optimizer,stepLRScheduler=True,step_size=60)

epoch:0/200
****************************************************************************************************
training: 1.9586, 0.2980
validation: 1.9514, 0.2780
epoch:1/200
****************************************************************************************************
training: 1.4943, 0.4890
validation: 2.4160, 0.2502
epoch:2/200
****************************************************************************************************
training: 1.2993, 0.5390
validation: 1.2112, 0.5893
epoch:3/200
****************************************************************************************************
training: 1.1707, 0.5920
validation: 1.5159, 0.4765
epoch:4/200
****************************************************************************************************
training: 1.1941, 0.5870
validation: 1.1511, 0.6057
epoch:5/200
****************************************************************************************************
training: 1.0214, 0.6250
validation: 1.3957, 0.5254
epoch:6/20

In [27]:

## about new model
num_classes = 10
model = baselineModel(num_classes)
model.name = 'Resnet with 4 LKA layer StepLR'
model.layer1 = Attention(64,64,64)
model.layer2 = Attention(64,128,128)
model.layer3 = Attention(128,256,256)
model.layer4 = Attention(256,512,512)
lr = 0.001
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
optimizer.name = 'SGD'
test_model(model,optimizer,stepLRScheduler=True,step_size=60)


epoch:0/200
****************************************************************************************************
training: 2.0835, 0.2940
validation: 2.0667, 0.3152
epoch:1/200
****************************************************************************************************
training: 1.7892, 0.4200
validation: 1.7026, 0.4819
epoch:2/200
****************************************************************************************************
training: 1.5912, 0.4920
validation: 2.4156, 0.5069
epoch:3/200
****************************************************************************************************
training: 1.4480, 0.5140
validation: 1.9354, 0.4117
epoch:4/200
****************************************************************************************************
training: 1.3810, 0.5570
validation: 1.6007, 0.4743
epoch:5/200
****************************************************************************************************
training: 1.2786, 0.5690
validation: 1.7627, 0.4096
epoch:6/20