# Deep Learning Homework 2 Part B

In [1]:
# check whether the torch cuda is ok
import torch
import os
os.environ['CUDA_VISIBLE_DEVICES']='0,1'
torch.cuda.is_available()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
%matplotlib inline

In [2]:
# import data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

## Note that: here we provide a basic solution for loading data and transforming data.
## You can directly change it if you find something wrong or not good enough.

## the mean and standard variance of imagenet dataset
## mean_vals = [0.485, 0.456, 0.406]
## std_vals = [0.229, 0.224, 0.225]

def load_data(data_dir = "dataset/",input_size = 224,batch_size = 36):
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    ## The default dir is for the first task of large-scale deep learning
    ## For other tasks, you may need to modify the data dir or even rewrite some part of 'data.py'
    image_dataset_train = datasets.ImageFolder(os.path.join(data_dir, '2-Medium-Scale', 'train'), data_transforms['train'])
    image_dataset_valid = datasets.ImageFolder(os.path.join(data_dir,'test'), data_transforms['test'])

    train_loader = DataLoader(image_dataset_train, batch_size=batch_size, shuffle=True, num_workers=4)
    valid_loader = DataLoader(image_dataset_valid, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_loader, valid_loader


# load the trained model
def load_model(device,model_name,optimizer_name):
    return torch.load(model_name+" with "+optimizer_name+ " best_model.pt",device)


In [4]:
from torchvision import models
import torch.nn as nn



def baselineModel(num_classes):
    model_resnet = models.resnet18(pretrained=False)
    num_features = model_resnet.fc.in_features
    model_resnet.fc = nn.Linear(num_features, num_classes)
    model_resnet.name = "Medium-Scale Baseline Learning"
    return model_resnet


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import os
from torch.utils.tensorboard import SummaryWriter

## Note that: here we provide a basic solution for training and validation.
## You can directly change it if you find something wrong or not good enough.

def train_model(model,train_loader, valid_loader, criterion, optimizer, num_epochs=20, logdir = 'logs_part_b'):
    def train(model, train_loader,optimizer,criterion):
        model.train(True)
        total_loss = 0.0
        total_correct = 0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predictions = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * inputs.size(0)
            total_correct += torch.sum(predictions == labels.data)

        epoch_loss = total_loss / len(train_loader.dataset)
        epoch_acc = total_correct.double() / len(train_loader.dataset)
        return epoch_loss, epoch_acc.item()

    def valid(model, valid_loader,criterion):
        model.train(False)
        total_loss = 0.0
        total_correct = 0
        for inputs, labels in valid_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predictions = torch.max(outputs, 1)
            total_loss += loss.item() * inputs.size(0)
            total_correct += torch.sum(predictions == labels.data)
        epoch_loss = total_loss / len(valid_loader.dataset)
        epoch_acc = total_correct.double() / len(valid_loader.dataset)
        return epoch_loss, epoch_acc.item()

    best_acc = 0.0
    train_loss_curves = []
    train_acc_curves = []
    valid_loss_curves = []
    valid_acc_curves = []
    writer = SummaryWriter(os.path.join(logdir,model.name + ' with ' + optimizer.name))
    for epoch in range(num_epochs):
        print('epoch:{:d}/{:d}'.format(epoch, num_epochs))
        print('*' * 100)
        train_loss, train_acc = train(model, train_loader,optimizer,criterion)
        print("training: {:.4f}, {:.4f}".format(train_loss, train_acc))
        valid_loss, valid_acc = valid(model, valid_loader,criterion)
        print("validation: {:.4f}, {:.4f}".format(valid_loss, valid_acc))
        train_acc_curves.append(train_acc)
        train_loss_curves.append(train_loss)
        valid_acc_curves.append(valid_acc)
        valid_loss_curves.append(valid_loss)
        writer.add_scalars(os.path.join(model.name+" with "+optimizer.name,'loss curves'),{'train':train_loss,'valid':valid_loss},epoch)
        writer.add_scalars(os.path.join(model.name+" with "+optimizer.name,'acc curves'),{'train':train_acc,'valid':valid_acc},epoch)
        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = model
            torch.save(best_model, model.name+' with '+optimizer.name +' best_model.pt')
    writer.close()
    return train_loss_curves,train_acc_curves,valid_loss_curves,valid_acc_curves


In [6]:
import torch.nn as nn
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional
class LKA(nn.Module):
    def __init__(
        self,
        dim:int
        ) -> None :
        super().__init__()
        self.conv1 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
        self.conv_spatial = nn.Conv2d(dim,dim,7,stride=1,padding=9,groups = dim ,dilation=3)
        self.conv2 = nn.Conv2d(dim,dim,1)

    def forward(self,x:Tensor) -> Tensor:
        u=x.clone()
        attn = self.conv1(x)
        attn = self.conv_spatial(attn)
        attn = self.conv2(attn)
        return u*attn




class Attention(nn.Module):
    def __init__(
        self,
        in_dim: int,
        bottleneck_dim: int,
        out_dim: int
    ) -> None:
        super().__init__()
        self.proj_1 = nn.Conv2d(in_dim, bottleneck_dim, 1)
        self.norm_layer_1 = nn.BatchNorm2d(bottleneck_dim)
        self.activation = nn.GELU()
        self.spatial_gating_unit = LKA(bottleneck_dim)
        self.proj_2 = nn.Conv2d(bottleneck_dim, out_dim, 1)
        self.norm_layer_2 = nn.BatchNorm2d(out_dim)
        self.downsample = None if in_dim==out_dim else nn.Sequential(
            nn.Conv2d(in_dim,out_dim,1),
            nn.BatchNorm2d(out_dim)
        )
    
    def forward(self,x:Tensor)->Tensor:
        out = self.proj_1(x)
        out = self.norm_layer_1(out)
        out = self.activation(out)
        out = self.spatial_gating_unit(out)
        out = self.norm_layer_2(out)
        out = self.proj_2(out)
        if (self.downsample is not None):
            x=self.downsample(x)
        out = out + x
        out = self.activation(out)
        return out

# class Block(nn.Module):
#     def __init__(
#         self,
#         input_dim: int,
        
#         drop_path: float,
#         act_layer: Optional[Callable[...,nn.Module]]=nn.GELU
#     ) -> None:
#         super().__init__()
#         self.norm1=Bat

In [9]:
def testBaselineModel():
    ## about model
    num_classes = 10

    ## about data
    data_dir = "dataset/" ## You need to specify the data_dir first
    input_size = 224
    batch_size = 16

    ## about training
    num_epochs = 100
    lr = 0.001

    ## model initialization
    model = baselineModel(num_classes=num_classes)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    ## data preparation
    train_loader, valid_loader = load_data(data_dir=data_dir,input_size=input_size, batch_size=batch_size)

    ## optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999))
    optimizer.name = 'Adam'
    ## loss function
    criterion = nn.CrossEntropyLoss()
    train_loss_curves,train_acc_curves,valid_loss_curves,valid_acc_curves = train_model(model,train_loader, valid_loader, criterion, optimizer, num_epochs=num_epochs)


In [13]:
def test_new_model(model):
    ## about data
    data_dir = "dataset/" ## You need to specify the data_dir first
    input_size = 224
    batch_size = 16

    ## about training
    num_epochs = 100
    lr = 0.001

    ## model initialization
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    ## data preparation
    train_loader, valid_loader = load_data(data_dir=data_dir,input_size=input_size, batch_size=batch_size)

    ## optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999))
    optimizer.name = 'Adam'
    ## loss function
    criterion = nn.CrossEntropyLoss()
    train_loss_curves,train_acc_curves,valid_loss_curves,valid_acc_curves = train_model(model,train_loader, valid_loader, criterion, optimizer, num_epochs=num_epochs)


In [14]:
## about model
num_classes = 10
model = baselineModel(num_classes)
model.name = 'Resnet with 4 LKA layer'
model.layer1 = Attention(64,64,64)
model.layer2 = Attention(64,128,128)
model.layer3 = Attention(128,256,256)
model.layer4 = Attention(256,512,512)
test_new_model(model)

epoch:0/100
****************************************************************************************************


RuntimeError: CUDA out of memory. Tried to allocate 26.00 MiB (GPU 0; 10.76 GiB total capacity; 9.46 GiB already allocated; 22.56 MiB free; 9.60 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [9]:
testBaselineModel()

epoch:0/100
****************************************************************************************************
training: 1.9901, 0.3490
validation: 26.5236, 0.1098
epoch:1/100
****************************************************************************************************
training: 1.4453, 0.4950
validation: 37.9088, 0.1328
epoch:2/100
****************************************************************************************************
training: 1.2622, 0.5540
validation: 3.2240, 0.3854
epoch:3/100
****************************************************************************************************
training: 1.2458, 0.5520
validation: 2.1845, 0.4202
epoch:4/100
****************************************************************************************************
training: 1.1488, 0.5830
validation: 1.5308, 0.5469
epoch:5/100
****************************************************************************************************
training: 1.1067, 0.6160
validation: 1.7032, 0.4663
epoch:6/

In [2]:
from torchvision import models
model = models.resnet18()
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  