# Setup:


In [1]:
# Casey Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/ECE 661/Final Project')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

#from tools.dataset import CIFAR10
#from torch.utils.data import DataLoader

# ResNet Code

In [3]:
'''
Properly implemented ResNet-s for CIFAR10 as described in paper [1].
The implementation and structure of this file is hugely influenced by [2]
which is implemented for ImageNet and doesn't have option A for identity.
Moreover, most of the implementations on the web is copy-paste from
torchvision's resnet and has wrong number of params.
Proper ResNet-s for CIFAR10 (for fair comparision and etc.) has following
number of layers and parameters:
name      | layers | params
ResNet20  |    20  | 0.27M
ResNet32  |    32  | 0.46M
ResNet44  |    44  | 0.66M
ResNet56  |    56  | 0.85M
ResNet110 |   110  |  1.7M
ResNet1202|  1202  | 19.4m
which this implementation indeed has.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
[2] https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
If you use this implementation in you work, please don't forget to mention the
author, Yerlan Idelbayev.
'''

__all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']

def _weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                """
                For CIFAR10 ResNet paper uses option A.
                """
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)

        self.apply(_weights_init)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def resnet20():
    return ResNet(BasicBlock, [3, 3, 3])


def resnet32():
    return ResNet(BasicBlock, [5, 5, 5])


def resnet44():
    return ResNet(BasicBlock, [7, 7, 7])


def resnet56():
    return ResNet(BasicBlock, [9, 9, 9])


def resnet110():
    return ResNet(BasicBlock, [18, 18, 18])


def resnet1202():
    return ResNet(BasicBlock, [200, 200, 200])


def test(net):
    import numpy as np
    total_params = 0

    for x in filter(lambda p: p.requires_grad, net.parameters()):
        total_params += np.prod(x.data.numpy().shape)
    print("Total number of params", total_params)
    print("Total layers", len(list(filter(lambda p: p.requires_grad and len(p.data.size())>1, net.parameters()))))


if __name__ == "__main__":
    for net_name in __all__:
        if net_name.startswith('resnet'):
            print(net_name)
            test(globals()[net_name]())
            print()

resnet20
Total number of params 269722
Total layers 20

resnet32
Total number of params 464154
Total layers 32

resnet44
Total number of params 658586
Total layers 44

resnet56
Total number of params 853018
Total layers 56

resnet110
Total number of params 1727962
Total layers 110

resnet1202
Total number of params 19421274
Total layers 1202



# RotNet (Do not change any of the below for SimCLR)


In [4]:
# Loading in CIFAR Dataset
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=16)
val_loader = torch.utils.data.DataLoader(valset, batch_size=100, shuffle=False, num_workers=2)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified




In [None]:
# Convert the batch into 4 rotated images

def rotnet_conversion(batch):
  size = batch.size()
  new_batch = torch.empty((0, size[1], size[2], size[3]))
  new_labels = torch.LongTensor([])
  for idx in range(size[0]):
    original_img = batch[idx]
    rot_0 = original_img
    rot_90 = transforms.functional.rotate(original_img, 90)
    rot_180 = transforms.functional.rotate(original_img, 180)
    rot_270 = transforms.functional.rotate(original_img, 270)
    rot_images = torch.stack((rot_0, rot_90, rot_180, rot_270), 0)
    rot_labels = torch.LongTensor([0,1,2,3])

    new_batch = torch.cat((new_batch, rot_images))
    new_labels = torch.cat((new_labels, rot_labels))
  return new_batch, new_labels

In [None]:
# RotNet Setup
device = 'cuda' if torch.cuda.is_available() else 'cpu'

net = ResNet(BasicBlock, [3, 3, 3], num_classes = 4)
net.to(device)

# Parameters (as described in the paper)
INITIAL_LR = 0.1
MOMENTUM = 0.9
REG = 5e-4
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = INITIAL_LR, momentum = MOMENTUM, weight_decay = REG)
EPOCHS = 100
DECAY_EPOCHS = {30, 60, 80}
DECAY = 0.2

In [None]:
# Training the RotNet with rotated images
CHECKPOINT_FOLDER = "./content/drive/MyDrive/ECE 661/Final Project"

best_val_acc = 0
current_learning_rate = INITIAL_LR

print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):
    # handle the learning rate scheduler.
    if i in DECAY_EPOCHS:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Training
    net.train()
    total_examples = 0
    correct_examples = 0
    train_loss = 0
    
    print("Epoch %d:" %i)
    
    # One Epoch
    for batch_idx, (inputs, targets) in enumerate(train_loader):
      
        inputs, targets = rotnet_conversion(inputs)
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        out = net.forward(inputs)
        loss = criterion(out, targets)
        net.zero_grad()
        loss.backward()
        train_loss += loss
        optimizer.step()
        
        # Counting Correct Predictions
        for idx, val in enumerate(out):
          max_out_idx = torch.argmax(val)
          target_idx = targets[idx]

          if max_out_idx == target_idx:
            correct_examples += 1
          
          total_examples += 1


    # Compute Loss/Accuracy     
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

    # Validation
    net.eval()
    total_examples = 0
    correct_examples = 0
    val_loss = 0

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):

            inputs, targets = rotnet_conversion(inputs)

            inputs = inputs.to(device)
            targets = targets.to(device)
            out = net.forward(inputs)
            loss = criterion(out, targets)
            val_loss += loss

            # Counting Correct Predictions
            for idx, val in enumerate(out):
              max_out_idx = torch.argmax(val)
              target_idx = targets[idx]

              if max_out_idx == target_idx:
                correct_examples += 1
              
              total_examples += 1

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'RotNet.pth'))
        
    print('')

print("="*50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:
Training loss: 1.0009, Training accuracy: 0.5664
Validation loss: 0.8996, Validation accuracy: 0.6251
Saving ...

Epoch 1:
Training loss: 0.8178, Training accuracy: 0.6652
Validation loss: 0.8420, Validation accuracy: 0.6599
Saving ...

Epoch 2:
Training loss: 0.6989, Training accuracy: 0.7193
Validation loss: 0.7149, Validation accuracy: 0.7163
Saving ...

Epoch 3:
Training loss: 0.5985, Training accuracy: 0.7648
Validation loss: 0.6251, Validation accuracy: 0.7573
Saving ...

Epoch 4:
Training loss: 0.5395, Training accuracy: 0.7917
Validation loss: 0.5592, Validation accuracy: 0.7809
Saving ...

Epoch 5:
Training loss: 0.4960, Training accuracy: 0.8092
Validation loss: 0.6103, Validation accuracy: 0.7615

Epoch 6:
Training loss: 0.4662, Training accuracy: 0.8222


# Training RotNet model on original CIFAR task

In [5]:
def initialize_weights(layer):
  init.kaiming_normal_(layer.weight)

In [6]:
class RotNet_Add_Linear_Layer(nn.Module):
    # rotnet: The network trained on image rotation
    def __init__(self, rotnet):
        super(RotNet_Add_Linear_Layer, self).__init__()
        self.in_planes = 16

        self.conv1 = rotnet.conv1
        self.bn1 = rotnet.bn1
        self.layer1 = rotnet.layer1
        self.layer2 = rotnet.layer2
        self.layer3 = rotnet.layer3
        self.linear = nn.Linear(64, 10)

        initialize_weights(self.linear)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [7]:
class RotNet_Linear_Layer_After_Second_Conv(nn.Module):
    # rotnet: The network trained on image rotation
    def __init__(self, rotnet):
        super(RotNet_Linear_Layer_After_Second_Conv, self).__init__()
        self.in_planes = 16

        self.conv1 = rotnet.conv1
        self.bn1 = rotnet.bn1
        self.layer1 = rotnet.layer1
        self.layer2 = rotnet.layer2
        self.linear = nn.Linear(8192, 10)

        initialize_weights(self.linear)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [66]:
class RotNet_Conv_And_Linear_Layer_After_Second_Conv(nn.Module):
    # rotnet: The network trained on image rotation
    def __init__(self, rotnet, layer3):
        super(RotNet_Conv_And_Linear_Layer_After_Second_Conv, self).__init__()
        self.in_planes = 16

        self.conv1 = rotnet.conv1
        self.bn1 = rotnet.bn1
        self.layer1 = rotnet.layer1
        self.layer2 = rotnet.layer2
        self.layer3 = layer3
        self.linear = nn.Linear(64, 10)

        initialize_weights(self.linear)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out




In [135]:
# Loading in Rotnet model
RotNet_path = "/content/drive/MyDrive/ECE 661/Final Project/RotNetFinal.pth"

device = 'cuda' if torch.cuda.is_available() else 'cpu'
rotnet = ResNet(BasicBlock, [3, 3, 3], num_classes = 4)
rotnet.load_state_dict(torch.load(RotNet_path)["state_dict"])
torch.manual_seed(2022)

# Parameters
INITIAL_LR = 0.1
MOMENTUM = 0.9
REG = 5e-4
criterion = nn.CrossEntropyLoss()
EPOCHS = 50
DECAY_EPOCHS = {15, 30, 40}
DECAY = 0.2

In [128]:
# Default
net = ResNet(BasicBlock, [3, 3, 3], num_classes = 10)
net = net.to(device)

In [None]:
# Replacing the final linear layer with a new one
net = RotNet_Add_Linear_Layer(rotnet)
net = net.to(device)

In [None]:
# Removing the final conv block, and put linear layer after second conv block
net = RotNet_Linear_Layer_After_Second_Conv(rotnet)
net = net.to(device)

In [136]:
# Replace the final conv block/final linear layer with a new final conv block/linear layer
dummy = ResNet(BasicBlock, [3, 3, 3], num_classes = 4)
net = RotNet_Conv_And_Linear_Layer_After_Second_Conv(rotnet, dummy.layer3)
net = net.to(device)

In [137]:
# Use these optimizers to select specific layers to update

#optimizer = optim.SGD(net.parameters(), lr = INITIAL_LR, momentum = MOMENTUM, weight_decay = REG)
#optimizer = optim.SGD(net.linear.parameters(), lr = INITIAL_LR, momentum = MOMENTUM, weight_decay = REG)

params = list(net.linear.parameters()) + list(net.layer3.parameters())
optimizer = optim.SGD(params, lr = INITIAL_LR, momentum = MOMENTUM, weight_decay = REG)

In [138]:
# Semi-Supervised

# Taking Subsets of CIFAR Dataset for Semi-Supervised Training
# Creates a subset with the specified number of data points from each class

def get_subset_trainloader(trainset, num):
  train_indices = torch.tensor([])

  # Randomly select num samples from each class, and append to tensor of all indices
  for i in range(10):
    curr_class_labels = torch.tensor(trainset.targets) == i
    indices = curr_class_labels.nonzero().reshape(-1)
    indices_of_selected_indices = torch.randperm(len(indices))[:num]
    selected_indices = indices[indices_of_selected_indices]
    train_indices = torch.cat((train_indices, selected_indices))
  
  # Create data subset using indices, and create train_loader from this subset
  train_indices = train_indices.tolist()
  train_indices = [int(x) for x in train_indices]
  data_subset = torch.utils.data.Subset(trainset, train_indices)
  train_loader = torch.utils.data.DataLoader(data_subset, batch_size=128, shuffle=True, num_workers=16)
  return train_loader

# Note that 5000 is all of the images from each class
train_loader = get_subset_trainloader(trainset, 50)

In [139]:
CHECKPOINT_FOLDER = "./content/drive/MyDrive/ECE 661/Final Project"

best_val_acc = 0
current_learning_rate = INITIAL_LR

print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):
    # handle the learning rate scheduler.
    if i in DECAY_EPOCHS:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Training
    net.train()
    total_examples = 0
    correct_examples = 0
    train_loss = 0

    print("Epoch %d:" %i)
    
    # One Epoch
    for batch_idx, (inputs, targets) in enumerate(train_loader):
      
        inputs = inputs.to(device)
        targets = targets.to(device)
        out = net.forward(inputs)
        loss = criterion(out, targets)
        net.zero_grad()
        loss.backward()
        train_loss += loss
        optimizer.step()
        
        # Counting Correct Predictions
        for idx, val in enumerate(out):
          max_out_idx = torch.argmax(val)
          target_idx = targets[idx]

          if max_out_idx == target_idx:
            correct_examples += 1
          
          total_examples += 1

    print(total_examples)
    # Compute Loss/Accuracy     
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    


    # Validation
    net.eval()
    total_examples = 0
    correct_examples = 0
    val_loss = 0

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):

            inputs = inputs.to(device)
            targets = targets.to(device)
            out = net.forward(inputs)
            loss = criterion(out, targets)
            val_loss += loss

            # Counting Correct Predictions
            for idx, val in enumerate(out):
              max_out_idx = torch.argmax(val)
              target_idx = targets[idx]

              if max_out_idx == target_idx:
                correct_examples += 1
              
              total_examples += 1

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'RotNet_Linear_NoFT.pth'))
        
    print('')

print("="*50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:
500
Training loss: 2.4369, Training accuracy: 0.1440
Validation loss: 2.3178, Validation accuracy: 0.1300
Saving ...

Epoch 1:
500
Training loss: 1.9522, Training accuracy: 0.3260
Validation loss: 2.1614, Validation accuracy: 0.2007
Saving ...

Epoch 2:
500
Training loss: 1.5412, Training accuracy: 0.4460
Validation loss: 1.6480, Validation accuracy: 0.3736
Saving ...

Epoch 3:
500
Training loss: 1.2379, Training accuracy: 0.5480
Validation loss: 1.7999, Validation accuracy: 0.4018
Saving ...

Epoch 4:
500
Training loss: 0.8452, Training accuracy: 0.7040
Validation loss: 2.2684, Validation accuracy: 0.3251

Epoch 5:
500
Training loss: 0.5275, Training accuracy: 0.8460
Validation loss: 1.9889, Validation accuracy: 0.3618

Epoch 6:
500
Training loss: 0.3055, Training accuracy: 0.9400
Validation loss: 3.3679, Validation accuracy: 0.3369

Epoch 7:
500
Training loss: 0.1813, Training accuracy: 0.9580
Validation loss: 2.5864, Validation accuracy: 0.4111
Saving .

In [None]:
'''
Credit goes to https://github.com/p3i0t/SimCLR-CIFAR10
'''

import torch.nn as nn

class SimCLR(nn.Module):
    def __init__(self, base_encoder, projection_dim=128):
        super().__init__()
        self.enc = base_encoder(pretrained=False)  # load model from torchvision.models without pretrained weights.
        self.feature_dim = self.enc.fc.in_features

        # Customize for CIFAR10. Replace conv 7x7 with conv 3x3, and remove first max pooling.
        # See Section B.9 of SimCLR paper.
        self.enc.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
        self.enc.maxpool = nn.Identity()
        self.enc.fc = nn.Identity()  # remove final fully connected layer.

        # Add MLP projection.
        self.projection_dim = projection_dim
        self.projector = nn.Sequential(nn.Linear(self.feature_dim, 2048),
                                       nn.ReLU(),
                                       nn.Linear(2048, projection_dim))

    def forward(self, x):
        feature = self.enc(x)
        projection = self.projector(feature)
        return feature, projection

def simclr_resnet50():
    return SimCLR(torchvision.models.resnet50)