https://github.com/kuangliu/pytorch-cifar

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
import numpy as np
from datetime import datetime

In [None]:
# Root folder. Change this according to your folder structure.
ROOT_DIR = '/deeplearning/'

zipped_data = os.path.join(ROOT_DIR, 'processed_data.zip')

data_dir = '/content/data'
os.makedirs(data_dir, exist_ok=True)
!cp '{zipped_data}' '{data_dir}'

cp: cannot stat '/content/gdrive/My Drive/Class/2020-2nd-Semester/Deep Learning (CSE7512-00) (Sung Ho Bae)/project/processed_data.zip': No such file or directory


In [None]:
!unzip -qq '{data_dir}/processed_data.zip' -d '{data_dir}'

unzip:  cannot find or open /content/data/processed_data.zip, /content/data/processed_data.zip.zip or /content/data/processed_data.zip.ZIP.


In [None]:
train_dir = os.path.join(data_dir, 'train')
valid_dir = os.path.join(data_dir, 'validation')
test_dir = os.path.join(data_dir, 'test')

# Define Model

## LeNet_STL10

In [None]:
import torch
import torch.nn as nn

class LeNet_STL10(nn.Module):
    def __init__(self):
        super(LeNet_STL10, self).__init__()
        self.num_classes = 10

        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d((2, 2), stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.pool2 = nn.MaxPool2d((2, 2), stride=2)
        self.fc1 = nn.Linear(16*21*21, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, self.num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)

        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.flatten(x, 1)

        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)

        return x

## DenseNet

In [None]:
class Dense_Block(nn.Module):
    def __init__(self, in_channels):
        super(Dense_Block, self).__init__()

        # self.relu = nn.LeakyReLU(inplace=True)
        # self.relu = nn.ReLU(inplace = True)
        # self.activation = nn.ELU()
        self.activation = nn.ReLU()
        self.bn = nn.BatchNorm2d(num_features = in_channels)

        self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)
        self.conv2 = nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)
        self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)
        self.conv4 = nn.Conv2d(in_channels = 48, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)
        self.conv5 = nn.Conv2d(in_channels = 64, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)
        # self.conv6 = nn.Conv2d(in_channels = 80, out_channels = 16, kernel_size = 3, stride = 1, padding = 1)

    
    def forward(self, x):

        bn = self.bn(x)
        conv1 = self.activation(self.conv1(bn))

        conv2 = self.activation(self.conv2(conv1))
        c2_dense = self.activation(torch.cat([conv1, conv2], 1))

        conv3 = self.activation(self.conv3(c2_dense))
        c3_dense = self.activation(torch.cat([conv1, conv2, conv3], 1))

        conv4 = self.activation(self.conv4(c3_dense))
        c4_dense = self.activation(torch.cat([conv1, conv2, conv3, conv4], 1))

        conv5 = self.activation(self.conv5(c4_dense))
        c5_dense = self.activation(torch.cat([conv1, conv2, conv3, conv4, conv5], 1))

        return c5_dense


class Transition_Layer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Transition_Layer, self).__init__()

        self.activation = nn.ReLU()
        self.bn = nn.BatchNorm2d(num_features = out_channels)
        self.conv = nn.Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = 1, bias = False)
        self.avg_pool = nn.AvgPool2d(kernel_size = 2, stride = 2, padding = 0)

    def forward(self, x):

        bn = self.bn(self.activation(self.conv(x)))
        out = self.avg_pool(bn)

        return out

class DenseNet(nn.Module):
    def __init__(self, nr_classes):
        super(DenseNet, self).__init__()

        self.lowconv = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, padding = 3, bias = False)
        self.activation = nn.ReLU()

        # Make Dense Blocks
        self.denseblock1 = self._make_dense_block(Dense_Block, 64)
        self.denseblock2 = self._make_dense_block(Dense_Block, 80)
        self.denseblock3 = self._make_dense_block(Dense_Block, 80)

        # Make transition Layers
        self.transitionLayer1 = self._make_transition_layer(Transition_Layer, in_channels = 80, out_channels = 80)
        self.transitionLayer2 = self._make_transition_layer(Transition_Layer, in_channels = 80, out_channels = 80)
        self.transitionLayer3 = self._make_transition_layer(Transition_Layer, in_channels = 80, out_channels = 64)

        # Classifier
        self.bn = nn.BatchNorm2d(num_features = 64)
        self.pre_classifier = nn.Linear(64*12*12, 192)
        self.classifier = nn.Linear(192, nr_classes)

    def _make_dense_block(self, block, in_channels):
        layers = []
        layers.append(block(in_channels))
        return nn.Sequential(*layers)

    def _make_transition_layer(self, layer, in_channels, out_channels):
        modules = []
        modules.append(layer(in_channels, out_channels))
        return nn.Sequential(*modules)

    def forward(self, x):
        out = self.activation(self.lowconv(x))
        # print('self.relu(self.lowconv(x)): ', out.size())

        out = self.denseblock1(out)
        # print('self.denseblock1(out): ', out.size())
        out = self.transitionLayer1(out)
        # print('self.transitionLayer1(out): ', out.size())

        out = self.denseblock2(out)
        # print('self.denseblock2(out): ', out.size())
        out = self.transitionLayer2(out)
        # print('self.transitionLayer2(out): ', out.size())

        out = self.denseblock3(out)
        # print('self.denseblock3(out): ', out.size())
        out = self.transitionLayer3(out)
        # print('self.transitionLayer3(out): ', out.size())

        out = self.bn(out)
        # print('self.bn(out): ', out.size())
        # out = out.view(-1, 64*4*4)
        out = out.view(-1, 64*12*12)
        # print('out.view(-1, 64*12*12): ', out.size())

        out = self.pre_classifier(out)
        # print('self.pre_classifier(out): ', out.size())
        out = self.classifier(out)
        # print('self.classifier(out): ', out.size())

        return out

## ResNet
https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py

### v1

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)
        self.linear = nn.Linear(640*block.expansion, num_classes)

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


### v2


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)

        self.linear1 = nn.Linear(640*block.expansion, 64)
        self.classifier = nn.Linear(64, num_classes)

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)

        out = self.activation(self.linear1(out))
        out = self.classifier(out)
        
        return out


### v3

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 112, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)

        self.linear1 = nn.Linear(1440*block.expansion, 128)
        self.classifier = nn.Linear(128, num_classes)

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)

        out = self.activation(self.linear1(out))
        out = self.classifier(out)
        
        return out


### v4

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        self.activation = nn.ReLU()
        # self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)

        self.classifier = nn.Linear(1440*block.expansion, num_classes)

        self.activation = nn.ReLU()
        # self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)

        out = self.classifier(out)
        
        return out


### v5

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 124, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)

        self.linear1 = nn.Linear(1440*block.expansion, 64)
        self.classifier = nn.Linear(64, num_classes)

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)

        # out = out.view(out.size(0), -1)
        out = out.reshape(out.size(0), out.size(1)*out.size(2)*out.size(3))
        
        out = self.activation(self.linear1(out))
        out = self.classifier(out)
        
        return out


### v6

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()

    def forward(self, x):
        out = self.activation(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.activation(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 96, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 126, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 160, num_blocks[3], stride=2)

        self.linear1 = nn.Linear(1440*block.expansion, 64)
        self.classifier = nn.Linear(64, num_classes)

        # self.activation = nn.ReLU()
        self.activation = nn.ELU()
        
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):

        out = self.activation(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)

        # out = out.view(out.size(0), -1)
        out = out.reshape(out.size(0), out.size(1)*out.size(2)*out.size(3))
        
        out = self.activation(self.linear1(out))
        out = self.classifier(out)
        
        return out


# Utils

In [None]:
# Get your model here. 
# This function returns the actual model and its name. 
# The name just for logging purpose and not important. 
# You can return an empty string as the name.

def Model():
    # return LeNet_STL10(), 'LeNet_STL10'
    # return DenseNet(10), 'DenseNet'
    return ResNet(BasicBlock, [2, 2, 2, 2]), 'ResNet18'

In [None]:
# This is also to keep track of the different changes. You can just put an empty string.

# note = 'DenseNet. Convolutional output is 16 channels. Removed contrast=(0.5, 2), saturation=(0.5, 2), hue=(-0.1, 0.1)'
# note = 'Lenet with relu. optimizer changed to adam. increased patience to 25. learning rate decreased.'
note = 'ResNet18.v3. Random affine, perspective, and erasing are included. Patience increased to 50.'

In [None]:
# Check number of parameters of your mode.
model, model_architecture = Model()
pytorch_total_params = sum(p.numel() for p in model.parameters())

print(f"Number of parameters: {pytorch_total_params}")

print(int(pytorch_total_params))
print(2000000)

assert int(pytorch_total_params) <= 2000000, 'Your model has the number of parameters more than 2 millions..'

In [None]:
class AverageMeter(object):
    r"""Computes and stores the average and current value
    """
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, *meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def print(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    r"""Computes the accuracy over the $k$ top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        # _, pred = output.topk(maxk, 1, True, True)
        # pred = pred.t()
        # correct = pred.eq(target.view(1, -1).expand_as(pred))

        # faster topk (ref: https://github.com/pytorch/pytorch/issues/22812)
        _, idx = output.sort(descending=True)
        pred = idx[:,:maxk]
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            # correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

## Hyperparameters

In [None]:
current_time = datetime.now().strftime("%Y%m%d%H%M%S") 
SAVEPATH = os.path.join(ROOT_DIR, 'checkpoints', current_time)  # Path to save the models, logs, note.
LOG_DIR = os.path.join(SAVEPATH, 'logs')
os.makedirs(SAVEPATH, exist_ok=True)

PRINTFREQ = 10

WEIGHTDECAY = 5e-4
MOMENTUM = 0.9
BATCHSIZE = 64
EPOCHS = 5000
ES_PATIENCE = 30    # Early stopping patience.

## Train Model

In [None]:
import time

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [None]:
def train(train_loader, epoch, model, optimizer, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader), batch_time, data_time, losses,
                             top1, top5, prefix="Epoch: [{}]".format(epoch))
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss, accuracy 
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0].item(), input.size(0))
        top5.update(acc5[0].item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINTFREQ == 0:
            progress.print(i)

    print('Training => Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
    return top1.avg, top5.avg, losses.avg

# To validate the model on validation data.
def validate(validation_loader, model, criterion):
    val_losses = AverageMeter('Loss', ':.4e')
    val_top1 = AverageMeter('Acc@1', ':6.2f')
    val_top5 = AverageMeter('Acc@5', ':6.2f')

    model.eval()

    for i, (input, target) in enumerate(validation_loader):
        
        input = input.cuda()
        target = target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss, accuracy 
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        val_losses.update(loss.item(), input.size(0))
        val_top1.update(acc1[0].item(), input.size(0))
        val_top5.update(acc5[0].item(), input.size(0))

    print('Validation => Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=val_top1, top5=val_top5))
    print('           => Loss {val_losses.avg:.4f}'.format(val_losses=val_losses))
    return val_top1.avg, val_top5.avg, val_losses.avg

In [None]:
# Load tensorboard
%load_ext tensorboard
%tensorboard --logdir '{LOG_DIR}'

# After a few minutes into the training, reload the below tensorboard to see your progress.

In [None]:
model, model_architecture = Model()

LR = 1e-4
# optimizer = torch.optim.SGD(model.parameters(), 
#                             lr=LR,
#                             momentum=MOMENTUM, 
#                             weight_decay=WEIGHTDECAY,
#                             nesterov=True)

scheduler = None
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 150], gamma=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

criterion = torch.nn.CrossEntropyLoss()

model = model.cuda()
criterion = criterion.cuda()

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Augmentations for training.
train_transform = transforms.Compose([
    transforms.RandomCrop(96, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=(0.5, 2), contrast=(0.5, 2), saturation=(0.5, 2), hue=(-0.1, 0.1)),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.5, scale=(0.1, 0.1), ratio=(1, 1)),
    transforms.RandomAffine(degrees=10, translate=(0.05, 0.05), shear=5),
    transforms.RandomPerspective(distortion_scale=0.2),
    normalize
])


# Training data.
train_dataset = torchvision.datasets.ImageFolder(train_dir, transform=train_transform)
train_loader = DataLoader(train_dataset,
                            batch_size=BATCHSIZE, shuffle=True,
                            num_workers=0, pin_memory=True)


valid_transform = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

# Validation data.
valid_dataset = torchvision.datasets.ImageFolder(valid_dir, transform=valid_transform)
valid_loader = DataLoader(valid_dataset,
                            batch_size=BATCHSIZE, shuffle=True,
                            num_workers=0, pin_memory=True)


# Variables for logging.
writer = SummaryWriter(LOG_DIR)

top1_acc_train = None
top5_acc_train = None
loss_train = None

top1_acc_valid = None
top5_acc_valid = None
loss_valid = None

# For early stopping.
es_counter = 0
current_best_loss = np.Inf
current_best_acc = 0
model_name = ''

last_epoch = 0
for epoch in range(EPOCHS):

    print("\n----- epoch: {}, lr: {} -----".format(
        epoch, optimizer.param_groups[0]["lr"]))

    start_time = time.time()

    # Train
    top1_acc_train, top5_acc_train, loss_train = train(train_loader, epoch, model, optimizer, criterion)
    
    # Validate
    top1_acc_valid, top5_acc_valid, loss_valid = validate(valid_loader, model, criterion)

    # Save history.
    writer.add_scalars('accuracy/top1', {
        'train':top1_acc_train,
        'valid':top1_acc_valid,
    }, epoch)
    
    writer.add_scalars('accuracy/top5', {
        'train':top5_acc_train,
        'valid':top5_acc_valid,
    }, epoch)

    writer.add_scalars('loss', {
        'train':loss_train,
        'valid':loss_valid,
    }, epoch)

    elapsed_time = time.time() - start_time
    print('==> {:.2f} seconds to train this epoch\n'.format(elapsed_time))

    # learning rate scheduling
    # scheduler.step()

    # Save best model and early stopping.
    if loss_valid < current_best_loss:
        model_name = 'val_acc_{:.4f}_val_loss_{:.4f}_epoch_{}.pth'.format(top1_acc_valid, loss_valid, epoch)
        torch.save(model.state_dict(), os.path.join(SAVEPATH, model_name))
        current_best_loss = loss_valid
        current_best_acc = top1_acc_valid
        es_counter = 0
        last_epoch = epoch
    else:
        # if epoch > 30:
        es_counter += 1 

    if es_counter >= ES_PATIENCE:
        print('Early stopped at epoch: {}'.format(epoch))
        break

print(f"Train Top-1 Accuracy: {top1_acc_train}")
print(f"Valid Top-1 Accuracy: {top1_acc_valid}")

print(f"Best Accuracy: {current_best_acc}")
print(f"Best Loss: {current_best_loss}")

In [None]:
# Delete other models except the best one.
for item in os.listdir(SAVEPATH):
    if item != 'logs' and item != model_name:
        os.remove(os.path.join(SAVEPATH, item))

In [None]:
model, _ = Model()
model = model.cuda()
model.load_state_dict(torch.load(os.path.join(SAVEPATH, model_name)))
top1_acc_valid, top5_acc_valid, loss_valid = validate(valid_loader, model, criterion)

print(f"Valid Top-1 Accuracy: {top1_acc_valid}")
print(f"Best Loss: {loss_valid}")

In [None]:
# Save some information to keep track of the models.
def save_model_info():
    info_file = os.path.join(SAVEPATH, 'info.txt')
    info = '''
    MODEL : {}
    BATCHSIZE : {}\n
    EPOCHS : {}\n 
    ES_PATIENCE : {}\n

    optimizer : {}\n
    scheduler : {}\n
    criterion : {}\n
    train_transform : {}\n\n

    best_valid_acc : {}\n
    train_acc_top1 : {}\n
    valid_acc_top1 : {}\n

    pytorch_total_params : {}\n\n
    
    note : {}\n
    '''.format(model_architecture, BATCHSIZE, last_epoch, ES_PATIENCE, optimizer, scheduler, criterion, train_transform, current_best_acc, top1_acc_train, top1_acc_valid, pytorch_total_params, note)
    
    with open(info_file, 'w') as f:
        f.write(info)

In [None]:
save_model_info()

## Make an evalutation csv file

In [None]:
import torch
import pandas as pd
import argparse
import time
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
def eval():
    ########## You can change this part only in this cell ##########
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])
    ################################################################

    test_dataset = torchvision.datasets.ImageFolder(test_dir, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCHSIZE, num_workers=0, shuffle=False)

    model, _ = Model()
    model = model.cuda()
    model.load_state_dict(torch.load(os.path.join(SAVEPATH, model_name)))

    print('Make an evaluation csv file for kaggle submission...')
    Category = []
    for input, _ in test_loader:
        input = input.cuda()
        output = model(input)
        output = torch.argmax(output, dim=1)
        Category = Category + output.tolist()

    Id = list(range(0, 8000))
    samples = {
       'Id': Id,
       'Category': Category 
    }
    df = pd.DataFrame(samples, columns=['Id', 'Category'])

    df.to_csv(os.path.join(SAVEPATH, 'submission.csv'), index=False)
    print('Done!!')


if __name__ == "__main__":
    eval()