In [2]:
!pip3 install torch torchvision

Defaulting to user installation because normal site-packages is not writeable


In [3]:
!pip3 install torch-summary

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import torch
torch.cuda.empty_cache()
import math
import torchvision
import torchvision.transforms as transforms
import random
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
from torch.utils.data import DataLoader,random_split
import torch.nn.functional as F
from collections import OrderedDict
import csv
import pandas as pd
import numpy as np
from torchsummary import summary
import torch.optim as optim
import os
from collections import defaultdict
from torch.optim.optimizer import Optimizer


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class Cutout(object):
    """Randomly mask out one or more patches from an image.

    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img

In [6]:
class Lookahead(Optimizer):
    r"""PyTorch implementation of the lookahead wrapper.

    Lookahead Optimizer: https://arxiv.org/abs/1907.08610
    """

    def __init__(self, optimizer, la_steps=5, la_alpha=0.8, pullback_momentum="none"):
        """optimizer: inner optimizer
        la_steps (int): number of lookahead steps
        la_alpha (float): linear interpolation factor. 1.0 recovers the inner optimizer.
        pullback_momentum (str): change to inner optimizer momentum on interpolation update
        """
        self.optimizer = optimizer
        self._la_step = 0  # counter for inner optimizer
        self.la_alpha = la_alpha
        self._total_la_steps = la_steps
        pullback_momentum = pullback_momentum.lower()
        assert pullback_momentum in ["reset", "pullback", "none"]
        self.pullback_momentum = pullback_momentum

        self.state = defaultdict(dict)

        # Cache the current optimizer parameters
        for group in optimizer.param_groups:
            for p in group['params']:
                param_state = self.state[p]
                param_state['cached_params'] = torch.zeros_like(p.data)
                param_state['cached_params'].copy_(p.data)
                if self.pullback_momentum == "pullback":
                    param_state['cached_mom'] = torch.zeros_like(p.data)

    def __getstate__(self):
        return {
            'state': self.state,
            'optimizer': self.optimizer,
            'la_alpha': self.la_alpha,
            '_la_step': self._la_step,
            '_total_la_steps': self._total_la_steps,
            'pullback_momentum': self.pullback_momentum
        }

    def zero_grad(self):
        self.optimizer.zero_grad()

    def get_la_step(self):
        return self._la_step

    def state_dict(self):
        return self.optimizer.state_dict()

    def load_state_dict(self, state_dict):
        self.optimizer.load_state_dict(state_dict)

    def _backup_and_load_cache(self):
        """Useful for performing evaluation on the slow weights (which typically generalize better)
        """
        for group in self.optimizer.param_groups:
            for p in group['params']:
                param_state = self.state[p]
                param_state['backup_params'] = torch.zeros_like(p.data)
                param_state['backup_params'].copy_(p.data)
                p.data.copy_(param_state['cached_params'])

    def _clear_and_load_backup(self):
        for group in self.optimizer.param_groups:
            for p in group['params']:
                param_state = self.state[p]
                p.data.copy_(param_state['backup_params'])
                del param_state['backup_params']

    @property
    def param_groups(self):
        return self.optimizer.param_groups

    def step(self, closure=None):
        """Performs a single Lookahead optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = self.optimizer.step(closure)
        self._la_step += 1

        if self._la_step >= self._total_la_steps:
            self._la_step = 0
            # Lookahead and cache the current optimizer parameters
            for group in self.optimizer.param_groups:
                for p in group['params']:
                    param_state = self.state[p]
                    p.data.mul_(self.la_alpha).add_(param_state['cached_params'], alpha=1.0 - self.la_alpha)  # crucial line
                    param_state['cached_params'].copy_(p.data)
                    if self.pullback_momentum == "pullback":
                        internal_momentum = self.optimizer.state[p]["momentum_buffer"]
                        self.optimizer.state[p]["momentum_buffer"] = internal_momentum.mul_(self.la_alpha).add_(
                            1.0 - self.la_alpha, param_state["cached_mom"])
                        param_state["cached_mom"] = self.optimizer.state[p]["momentum_buffer"]
                    elif self.pullback_momentum == "reset":
                        self.optimizer.state[p]["momentum_buffer"] = torch.zeros_like(p.data)

        return loss

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  
start_epoch = 0

In [8]:
device

'cuda'

In [9]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, kernel_size=3):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion*out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion*out_channels, kernel_size=kernel_size, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, block, num_blocks=[2,2,2,2], num_classes=10, kernel_size=3, pool_size=4):
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=kernel_size, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, kernel_size=kernel_size)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, kernel_size=kernel_size)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, kernel_size=kernel_size)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, kernel_size=kernel_size)
        self.avgpool = nn.AvgPool2d(pool_size)
        self.fc = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride, kernel_size):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride, kernel_size=kernel_size))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [10]:
# Model Configuration :


model = ResNet18(BasicBlock,  num_blocks = [1,1,1,1],kernel_size=1,pool_size=4)

print(summary(model))

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            192
├─BatchNorm2d: 1-2                       128
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  36,864
|    |    └─BatchNorm2d: 3-2             128
|    |    └─Conv2d: 3-3                  36,864
|    |    └─BatchNorm2d: 3-4             128
|    |    └─Sequential: 3-5              --
├─Sequential: 1-4                        --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  73,728
|    |    └─BatchNorm2d: 3-7             256
|    |    └─Conv2d: 3-8                  147,456
|    |    └─BatchNorm2d: 3-9             256
|    |    └─Sequential: 3-10             8,448
├─Sequential: 1-5                        --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 294,912
|    |    └─BatchNorm2d: 3-12            512
|    |    └─Conv2d: 3-13               

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  
start_epoch = 0

In [12]:
# Training

import time
def train(epoch):
    start_time = time.time()
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    end_time = time.time()
    acc = 100.*correct/total
    loss = 100.*train_loss/total
    print('Train Loss: %.3f | Train Acc: %.3f%% (%d/%d) | time: %.3f seconds'
                     % (loss, acc, correct, total, end_time-start_time))
    model_results[str(epoch)] =  {"train" : {"acc" : acc,"loss" : loss},"test" : {}}
    
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    start_time = time.time()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    end_time = time.time()
    # Save checkpoint.
    acc = 100.*correct/total
    loss = 100.*test_loss/total
    print('Test Loss: %.3f | Test Acc: %.3f%% (%d/%d)  | time: %.3f seconds'
                     % (loss, acc, correct, total,end_time-start_time))
    model_results[str(epoch)]['test']  = {"acc" : acc,"loss" : loss}
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint_rms'):
            os.mkdir('checkpoint_rms')
        torch.save(state, './checkpoint_rms/ckpt_256_lr_1.pth')
        best_acc = acc
        
#Model Parameters

batch_size = 256
lr = 0.1
optim_param = {'la_steps':5,
               'la_alpha':0.5
              }
resume = False 
model_results = {}

#Load model
        
net = model
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)

if resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint_rms'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint_rms/ckpt_256_lr_1.pth')
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    Cutout(n_holes=1, length=8)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=2)


criterion = nn.CrossEntropyLoss()
base_optim = optim.RMSprop(net.parameters(), lr=lr)

Q = math.floor(len(trainset)/batch_size)
optimizer = Lookahead(base_optim, **optim_param)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=Q)

epochs = 50

for epoch in range(start_epoch, start_epoch+epochs+1):
    train(epoch)
    test(epoch)
    scheduler.step()


==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified

Epoch: 0
Train Loss: 2.434 | Train Acc: 19.732% (9866/50000) | time: 20.675 seconds
Test Loss: 0.870 | Test Acc: 20.540% (2054/10000)  | time: 1.085 seconds
Saving..

Epoch: 1
Train Loss: 0.752 | Train Acc: 28.862% (14431/50000) | time: 19.515 seconds
Test Loss: 0.905 | Test Acc: 26.300% (2630/10000)  | time: 1.120 seconds
Saving..

Epoch: 2
Train Loss: 0.690 | Train Acc: 34.624% (17312/50000) | time: 19.788 seconds
Test Loss: 0.892 | Test Acc: 30.990% (3099/10000)  | time: 1.062 seconds
Saving..

Epoch: 3
Train Loss: 0.642 | Train Acc: 39.718% (19859/50000) | time: 20.847 seconds
Test Loss: 0.743 | Test Acc: 37.640% (3764/10000)  | time: 0.988 seconds
Saving..

Epoch: 4
Train Loss: 0.597 | Train Acc: 45.196% (22598/50000) | time: 20.613 seconds
Test Loss: 0.659 | Test Acc: 48.390% (4839/10000)  | time: 1.164 seconds
Saving..

Epoch: 5
Train Loss: 0.529 | Train Acc: 52.102% (26051/50000) |

In [13]:
import json
with open('result_rms_256_lr_1.json', 'w') as fp:
    json.dump(model_results, fp)

In [14]:
checkpoint = torch.load('./checkpoint_rms/ckpt_256_lr_1.pth')
net.load_state_dict(checkpoint['net'])
best_acc = checkpoint['acc']

In [15]:
best_acc

86.39