#[PyTorch Tutorial](https://pytorch.org/tutorials/index.html)

In [1]:
%matplotlib inline

In [2]:
# download alexNet model
!wget --no-check-certificate \
    'https://drive.google.com/uc?export=download&id=18qJ8RKgMbHCnALbY5fNWPZELEyZr1x0f' \
    -O /content/resnet.py

--2022-08-26 04:33:36--  https://drive.google.com/uc?export=download&id=18qJ8RKgMbHCnALbY5fNWPZELEyZr1x0f
Resolving drive.google.com (drive.google.com)... 74.125.130.113, 74.125.130.138, 74.125.130.139, ...
Connecting to drive.google.com (drive.google.com)|74.125.130.113|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘/content/resnet.py’

/content/resnet.py      [ <=>                ]   2.21K  --.-KB/s    in 0s      

2022-08-26 04:33:38 (20.9 MB/s) - ‘/content/resnet.py’ saved [2260]



In [3]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F

from torchvision.models import resnet18

## 1. Load and normalizing the datasets using torchvision

---



The output of torchvision datasets are PILImage images of range [0, 1].



In [4]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=8)

testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=8)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting ./data/cifar-100-python.tar.gz to ./data


  cpuset_checked))


## 2. Define a Convolution Neural Network



In [5]:
net = resnet18()
net = net.cuda()

## 3. Define a Loss function and optimizer

Let's use a Classification Cross-Entropy loss and SGD with momentum.



In [6]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-04)

In [7]:
def mixup_data(x, y, alpha=1.0):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    lam = np.random.beta(alpha, alpha)
    ##################################################
    #
    #   args:
    #
    #   x           : inputs (batch_size, 3, 32, 32)
    #   y           : target labels of x
    #   alpha       : parameter of beta distribution
    #
    #   return:
    #      
    #   mixed_x     : the result of mix between x
    #   y_a, y_b    : target labels of mixed_x  ex) if x_1 (class 0) and x_2 (class 1) with lam (mixed_x = lam * x_1 + (1-lam) * x_2), then y_a, y_b are 0, 1 respectively. 
    #   lam
    #
    ##################################################    
    #
    #   your code
    #   # Obtain randomly permutated index to mixup different input x in batch
    #   # You can use the function torch.ranperm()
    #   # https://pytorch.org/docs/stable/torch.html?highlight=randperm#torch.randperm
    #   index = ... 
    #
    #   mixed_x = ...
    #   y_a = ...
    #   y_b = ...
    #
    index = torch.randperm(x.size(0))
    mixed_x = lam*x + (1-lam)*x[index]
    y_a, y_b = y, y[index]
    ##################################################

    return mixed_x, y_a, y_b, lam

In [8]:
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    ##################################################
    #
    #   args:
    #
    #   criterion   : ex) nn.CrossEntropyLoss()
    #   pred        : outputs (= net(inputs))
    #   y_a, y_b    : target labels of mixed inputs
    #   lam         : lambda value of mixup
    #
    #   return:
    #      
    #   mixup_criterion
    #
    ##################################################    
    #
    #   your code
    #
    #   mixup_criterion = ...
    #
    mixup_criterion = lam*criterion(pred, y_a) + (1-lam)*criterion(pred, y_b)
    ##################################################
    return mixup_criterion

## 4. Train the network

This is when things start to get interesting.
We simply have to loop over our data iterator, and feed the inputs to the
network and optimize.



In [9]:
import time

epochs = 40

for epoch in range(epochs):  # loop over the dataset multiple times

    net.train()
    
    if epoch in [20, 30]:
        optimizer.param_groups[0]['lr'] *= 0.1
        print(optimizer.param_groups[0]['lr'])
    
    running_loss = 0.0
    start = time.time()

    correct = 0
    total = 0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        
        # uncomment this line, to switch mode to mixup after implementation 
        # inputs, targets_a, targets_b, lam = mixup_data(inputs, labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        loss = criterion(outputs, labels)

        # uncomment this line, to switch mode to mixup after implementation
        # loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)

        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()

    end = time.time()
    print('(Train) [epoch : %d] loss: %.3f / time: %.3f / acc@1: %.3f' %
          (epoch + 1, running_loss / len(trainloader), (end-start), 100 * correct / total))
        
    net.eval()
            
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.cuda(), labels.cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('(Test) Accuracy of the network on the 10000 test images: %.3f %%\n' % (
        100 * correct / total))

print('Finished Training')

(Train) [epoch : 1] loss: 4.107 / time: 27.477 / acc@1: 8.914
(Test) Accuracy of the network on the 10000 test images: 15.330 %

(Train) [epoch : 2] loss: 3.414 / time: 20.699 / acc@1: 17.876
(Test) Accuracy of the network on the 10000 test images: 21.820 %

(Train) [epoch : 3] loss: 3.109 / time: 20.417 / acc@1: 23.120
(Test) Accuracy of the network on the 10000 test images: 25.860 %

(Train) [epoch : 4] loss: 2.874 / time: 22.545 / acc@1: 27.468
(Test) Accuracy of the network on the 10000 test images: 31.400 %

(Train) [epoch : 5] loss: 2.681 / time: 20.563 / acc@1: 31.320
(Test) Accuracy of the network on the 10000 test images: 31.010 %

(Train) [epoch : 6] loss: 2.530 / time: 20.529 / acc@1: 34.326
(Test) Accuracy of the network on the 10000 test images: 36.210 %

(Train) [epoch : 7] loss: 2.401 / time: 20.291 / acc@1: 37.110
(Test) Accuracy of the network on the 10000 test images: 37.680 %

(Train) [epoch : 8] loss: 2.291 / time: 20.476 / acc@1: 39.372
(Test) Accuracy of the netwo