## Setup


In [2]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms
import torchvision

from torch.utils.data import DataLoader

Archive:  tools.zip
   creating: tools/
  inflating: tools/utils.py          
   creating: tools/.ipynb_checkpoints/
  inflating: tools/.ipynb_checkpoints/dataset-checkpoint.py  
   creating: tools/__pycache__/
  inflating: tools/__pycache__/utils.cpython-38.pyc  
  inflating: tools/__pycache__/dataset.cpython-38.pyc  
  inflating: tools/dataset.py        


In [20]:

###################### Data Preprocessing #######################


train_transform = transforms.Compose([
            # make sure we're using PIL instead of tensor when doing other transform 
            transforms.RandomCrop(32, 4),
            #transforms.GaussianBlur(23, sigma=(0.1, 2.0)), # CIFAR 10 doesn't use gaussian blur
            #transforms.RandomResizedCrop(size=32,scale=(0.08,0.1),ratio=(0.75,1.33)),
            transforms.RandomHorizontalFlip(p=0.5),
            #get_color_distortion(),
            transforms.ToTensor(),
            # the normalize numbers are from previous assignment
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])



In [33]:
linear_eval_transform_test = transforms.Compose([
            #transforms.GaussianBlur(23, sigma=(0.1, 2.0)), # CIFAR 10 doesn't use gaussian blur
            transforms.RandomResizedCrop(size=32,scale=(0.08,0.1),ratio=(0.75,1.33)),
            transforms.ToTensor(),
            # the normalize numbers are from previous assignment
            # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])

In [34]:
# load and split data 
BATCH_SIZE = 128

#all_train_cifar = datasets.CIFAR10('./data7', train=True, download=True, transform=transforms.Compose(
#    [transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]))
#further split train and validation set 

all_train_cifar = datasets.CIFAR10('./data7', train=True, download=True, transform=train_transform)

train_set, val_set = torch.utils.data.random_split(all_train_cifar, [45000, 5000])



train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size = BATCH_SIZE, shuffle=True, )

val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size = BATCH_SIZE, shuffle=True, )

# the testset don't have data augmentation
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data7', train=False, download=True, transform=linear_eval_transform_test),
    batch_size = BATCH_SIZE, shuffle=True, )


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data7/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data7/cifar-10-python.tar.gz to ./data7
Files already downloaded and verified


In [None]:
 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)     

## Model

In [5]:

class Block(nn.Module):
  def __init__(self, in_c: int, out_c: int, s: int):
    """
    A Block Model that takes in a few arguments. This will represent 1 block in the ResNet layer
    It should have 2 convoluational layer in each block
    1. in_c will indicate the number of input features 
    2. out_c will indicate the number of desired output features
    3. s will indicate the number of stride of the first conv layer in the block, it's also an
    indicator of whether the block is performing downsampling. 
    """
    super(Block, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_c, out_channels = out_c, kernel_size = 3, stride= s, padding = 1)
    self.conv1_bn = nn.BatchNorm2d(out_c)
    self.conv2 = nn.Conv2d(in_channels = out_c, out_channels = out_c, kernel_size = 3, stride=1, padding = 1)
    self.conv2_bn = nn.BatchNorm2d(out_c)
    # if this is the first block in the layer (2nd & 3rd), we want to resize the identity
    # when stride !=1, that means we're downsampling in the first block of the layer
    self.identity = nn.Sequential() # if not downsample layer, then do nothing
    if s!=1: 
      self.identity = nn.Sequential(
          # we use option b 1x1 conv here
          nn.Conv2d(in_channels = in_c, out_channels = out_c, kernel_size = 1 , stride = 2, padding = 0),
          nn.BatchNorm2d(out_c)
      )

  def forward(self, x):  
    out = self.conv1(x)
    out = self.conv1_bn(out)
    out = F.relu(out)
    out = self.conv2(out)
    out = self.conv2_bn(out)
    # add in the identity here (make sure the size is correct)
    out += self.identity(x)
    out = F.relu(out)
    return out 


class ResNet18_pred(nn.Module):
    def __init__(self, in_c, resblock):
      """
      This is the model that is representing the ResNet 20 archieture
      It should have 20 layers in total.
      1. in_c will indicate the image input feature channel number
      2. resblock will indicatet the Block that we created previously
      """
      super(ResNet18_pred, self).__init__()

      # layer 0 
      # skip maxpooling in the first conv
      self.conv1 = nn.Conv2d(in_channels = in_c, out_channels = 64, kernel_size = 3, stride=1, padding= 1)
      self.conv1_bn = nn.BatchNorm2d(64)
      # define layers 
      self.layer1= nn.Sequential(
            resblock(in_c = 64, out_c = 64, s = 1),
            resblock(in_c = 64, out_c = 64, s = 1),
        )
      #downsampling layer
      self.layer2 = nn.Sequential(
            resblock(in_c = 64, out_c = 128, s = 2),
            resblock(in_c = 128, out_c = 128, s = 1),
        )
      #downsampling layer
      self.layer3 = nn.Sequential(
            resblock(in_c = 128, out_c = 256, s = 2),
            resblock(in_c = 256, out_c = 256, s = 1),
        )
      #downsampling layer
      self.layer4 = nn.Sequential(
            resblock(in_c = 256, out_c = 512, s = 2),
            resblock(in_c = 512, out_c = 512, s = 1),
        )
      
      self.linear = nn.Linear(512, 10)

    def forward(self, x):
      # first conv
      out = self.conv1(x)
      out = self.conv1_bn(out)
      out = F.relu(out)
      # first layer
      out = self.layer1(out)
      # second layer
      out = self.layer2(out)
      # third layer
      out = self.layer3(out)
      # fourth layer 
      out = self.layer4(out)

      # apply avg pooling
      out = F.avg_pool2d(out, out.size()[3])
      # out = self.mlp(out)
      out = out.view(out.size(0), -1)
      out = self.linear(out)
      #out = self.linear(out)
      return out

## Model Training

In [6]:
# some hyperparameters
# total number of training epochs

def training_epochs(lr, model,filename, EPOCHS, DECAY_EPOCHS, DECAY):
  #EPOCHS = 30
  # the folder where the trained model is saved
  CHECKPOINT_FOLDER = "./saved_model"
  # start the training/validation process
  # the process should take about 5 minutes on a GTX 1070-Ti
  # if the code is written efficiently.
  best_val_acc = 0
  current_learning_rate = lr

  print("==> Training starts!")
  print("="*50)
  for i in range(0, EPOCHS):
      # handle the learning rate scheduler.
      
      if i % DECAY_EPOCHS == 0 and i != 0:
          current_learning_rate = current_learning_rate * DECAY
          for param_group in optimizer.param_groups:
              param_group['lr'] = current_learning_rate
          print("Current learning rate has decayed to %f" %current_learning_rate)
      #######################
      # your code here
      # switch to train mode
      
      model.train()
      #######################
      
      print("Epoch %d:" %i)
      # this help you compute the training accuracy
      total_examples = 0
      correct_examples = 0

      train_loss = 0 # track training loss if you want
      
      # Train the model for 1 epoch.
      for batch_idx, (inputs, targets) in enumerate(train_loader):
          ####################################
          # your code here
          # copy inputs to device
          inputs, targets = inputs.to(device), targets.to(device)

          # compute the output and loss
          outputs = model(inputs)
          loss = criterion(outputs, targets)
          train_loss += loss    

          # zero the gradient
          optimizer.zero_grad()
  
          # backpropagation
          loss.backward()
  
          # apply gradient and update the weights
          optimizer.step()
          
          # count the number of correctly predicted samples in the current batch
          outputs = model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total_examples += targets.size(0)
          correct_examples += (predicted == targets).sum().item()

          ####################################
                  
      avg_loss = train_loss / len(train_loader)
      avg_acc = correct_examples / total_examples
      print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

      # Validate on the validation dataset
      #######################
      # your code here
      # switch to eval mode
      model.eval()
      
      #######################

      # this help you compute the validation accuracy
      total_examples = 0
      correct_examples = 0
      
      val_loss = 0 # again, track the validation loss if you want

      # disable gradient during validation, which can save GPU memory
      with torch.no_grad():
          for batch_idx, (inputs, targets) in enumerate(val_loader):
              ####################################
              # your code here
              # copy inputs to device
              inputs, targets = inputs.to(device), targets.to(device)

              
              # compute the output and loss
              outputs = model(inputs)
              loss = criterion(outputs, targets)
              val_loss += loss
              
              # count the number of correctly predicted samples in the current batch
              outputs = model(inputs)
              _, predicted = torch.max(outputs.data, 1)
              total_examples += targets.size(0)
              correct_examples += (predicted == targets).sum().item()
              ####################################

      avg_loss = val_loss / len(val_loader)
      avg_acc = correct_examples / total_examples
      print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
      
      # save the model checkpoint
      if avg_acc > best_val_acc:
          best_val_acc = avg_acc
          if not os.path.exists(CHECKPOINT_FOLDER):
              os.makedirs(CHECKPOINT_FOLDER)
          print("Saving ...")
          state = {'state_dict': model.state_dict(),
                   'epoch': i,
                   'lr': current_learning_rate}
          torch.save(state, os.path.join(CHECKPOINT_FOLDER, filename+'_test.pth'))
          
      print('')

  print("="*50)
  print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")
  return best_val_acc, val_loss

In [35]:
net = ResNet18_pred(in_c =3 , resblock = Block)
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=MOMENTUM ,weight_decay= 5e-5)
output = training_epochs(0.01, net , 'resnet20_3', 10, 80, 0.1)
#val_acc_list.append(output[0])


==> Training starts!
Epoch 0:
Training loss: 1.4538, Training accuracy: 0.5206
Validation loss: 1.1535, Validation accuracy: 0.5906
Saving ...

Epoch 1:
Training loss: 0.9655, Training accuracy: 0.6962
Validation loss: 0.8902, Validation accuracy: 0.6926
Saving ...

Epoch 2:
Training loss: 0.7468, Training accuracy: 0.7745
Validation loss: 0.7587, Validation accuracy: 0.7374
Saving ...

Epoch 3:
Training loss: 0.6291, Training accuracy: 0.8140
Validation loss: 0.6412, Validation accuracy: 0.7744
Saving ...

Epoch 4:
Training loss: 0.5454, Training accuracy: 0.8431
Validation loss: 0.5272, Validation accuracy: 0.8160
Saving ...

Epoch 5:
Training loss: 0.4857, Training accuracy: 0.8631
Validation loss: 0.5354, Validation accuracy: 0.8092

Epoch 6:
Training loss: 0.4368, Training accuracy: 0.8791
Validation loss: 0.5562, Validation accuracy: 0.8092

Epoch 7:
Training loss: 0.4052, Training accuracy: 0.8895
Validation loss: 0.5214, Validation accuracy: 0.8144

Epoch 8:
Training loss: 0.37

In [29]:
net = ResNet18_pred(in_c =3 , resblock = Block)
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=MOMENTUM ,weight_decay= 5e-5)
output = training_epochs(0.01, net , 'resnet20_2_', 10, 80, 0.1)
#val_acc_list.append(output[0])


==> Training starts!
Epoch 0:
Training loss: 1.3171, Training accuracy: 0.5778
Validation loss: 1.0074, Validation accuracy: 0.6372
Saving ...

Epoch 1:
Training loss: 0.7780, Training accuracy: 0.7736
Validation loss: 0.7137, Validation accuracy: 0.7528
Saving ...

Epoch 2:
Training loss: 0.5473, Training accuracy: 0.8573
Validation loss: 0.7370, Validation accuracy: 0.7520

Epoch 3:
Training loss: 0.3866, Training accuracy: 0.9182
Validation loss: 0.6502, Validation accuracy: 0.7888
Saving ...

Epoch 4:
Training loss: 0.2579, Training accuracy: 0.9640
Validation loss: 0.6399, Validation accuracy: 0.8040
Saving ...

Epoch 5:
Training loss: 0.1706, Training accuracy: 0.9859
Validation loss: 0.8084, Validation accuracy: 0.7818

Epoch 6:
Training loss: 0.1052, Training accuracy: 0.9953
Validation loss: 0.7725, Validation accuracy: 0.8092
Saving ...

Epoch 7:
Training loss: 0.0702, Training accuracy: 0.9987
Validation loss: 0.8274, Validation accuracy: 0.7950

Epoch 8:
Training loss: 0.05

## Testing

In [30]:
test = ResNet18_pred(3, Block) 
test.load_state_dict(torch.load("/content/saved_model/resnet20_3__test.pth")['state_dict'])
test.to(device)
test.eval()
for parameter in test.parameters():
    parameter.requires_grad = False

In [31]:
def test_model(mdl, loader):
    mdl.eval()
    running_correct = 0.
    running_loss = 0.
    running_total = 0.
    with torch.no_grad():
        for data,labels in loader:
            data = data.to(device); labels = labels.to(device)
            outputs = mdl(data)
            loss = F.cross_entropy(outputs, labels)
            _, preds = outputs.max(1)
            running_correct += preds.eq(labels).sum().item()
            running_loss += loss.item()
            running_total += labels.size(0)
    test_acc = running_correct/running_total
    test_loss = running_loss/len(loader)
    mdl.train()
    print
    return test_acc, test_loss

In [32]:
test_model(test, test_loader)

(0.8028, 0.8995775078670888)