In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as ttf

import os
import os.path as osp

from tqdm import tqdm
from PIL import Image
from sklearn.metrics import roc_auc_score
import numpy as np
torch.cuda.empty_cache()

# README

I didn't try many architectures. I just used the ResNet34 structure from scratch according to the suggestion from the recitation. To build the ResNet34, I referenced some articles about how ResNet34 works and how to build it from scratch. \

This notebook is based on the starter notebook. \

To run the code, just run the following blocks in order. Notice that in the 'Let's train' part, I used some 'if-else' statements to save the model based on different accuracy it achieved. And I finally loaded the best model just after the 'Let's train' part. Then use it for validation and submission. \

I firstly used 'batch_size = 256
lr = 0.1
epochs = 150' as the starting parameter. Then after about 120 epchos, I changed these parameters into 'batch_size = 256
lr = 0.01
epochs = 70' and trained another 70 epochs to try to jump out of the local minimal to get a higher accuracy. \

Based on the suggestions from our mentor, I used data augmentation techniques in the 'Dataset & DataLoader' part. I mainly used flip, rotation and normalization. These techniques helped me achieve at least 3% higher accuracy. \

The final accuracy is about 85%.


# TODOs
As you go, please read the code and keep an eye out for TODOs!

# Download Data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install --upgrade --force-reinstall --no-deps kaggle==1.5.8
!mkdir /root/.kaggle

with open("/root/.kaggle/kaggle.json", "w+") as f:
    f.write('{"username":"yanyuc","key":"dc3249c0209ecf021c2a7c30ff21d247"}') # Put your kaggle username & key here

!chmod 600 /root/.kaggle/kaggle.json

Collecting kaggle==1.5.8
  Downloading kaggle-1.5.8.tar.gz (59 kB)
[?25l[K     |█████▌                          | 10 kB 29.6 MB/s eta 0:00:01[K     |███████████                     | 20 kB 36.8 MB/s eta 0:00:01[K     |████████████████▋               | 30 kB 23.8 MB/s eta 0:00:01[K     |██████████████████████▏         | 40 kB 13.5 MB/s eta 0:00:01[K     |███████████████████████████▊    | 51 kB 15.9 MB/s eta 0:00:01[K     |████████████████████████████████| 59 kB 5.9 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.8-py3-none-any.whl size=73275 sha256=a38544303483091da5d0efb05fedb90c454edd7e60fd4df752b78be7cc834600
  Stored in directory: /root/.cache/pip/wheels/de/f7/d8/c3902cacb7e62cb611b1ad343d7cc07f42f7eb76ae3a52f3d1
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12


In [4]:
!kaggle competitions download -c 11-785-s22-hw2p2-classification
!kaggle competitions download -c 11-785-s22-hw2p2-verification

!unzip -q 11-785-s22-hw2p2-classification.zip
!unzip -q 11-785-s22-hw2p2-verification.zip

!ls

Downloading 11-785-s22-hw2p2-classification.zip to /content
100% 2.35G/2.35G [00:25<00:00, 149MB/s]
100% 2.35G/2.35G [00:25<00:00, 98.2MB/s]
Downloading 11-785-s22-hw2p2-verification.zip to /content
 95% 249M/263M [00:05<00:00, 42.4MB/s]
100% 263M/263M [00:06<00:00, 45.9MB/s]
11-785-s22-hw2p2-classification.zip   sample_data
11-785-s22-hw2p2-verification.zip     train_subset
classification			      verification
classification_sample_submission.csv  verification_sample_submission.csv
drive


# Hyperparameters

In [5]:
"""
The well-accepted SGD batch_size & lr combination for CNN classification is 256 batch size for 0.1 learning rate.
When changing batch size for SGD, follow the linear scaling rule - halving batch size -> halve learning rate, etc.
This is less theoretically supported for Adam, but in my experience, it's a decent ballpark estimate.
"""
batch_size = 256
lr = 0.1
epochs = 150 # Just for the early submission. We'd want you to train like 50 epochs for your main submissions.

# Residual Block

In [6]:
class ResBlock(nn.Module):
    def __init__(self, in_channel,out_channel, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.stride = stride
        self.relu2 = nn.ReLU()
        self.downsample = downsample


    def forward(self, x):
        shortcut = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
          shortcut = self.downsample(x)
        
        out += shortcut
        out = self.relu2(out)

        return out
    
    

# Very Simple Network

In [7]:
from torch.nn.modules.activation import ReLU
class Network(nn.Module):
    """
    The Very Low early deadline architecture is a 4-layer CNN.
    The first Conv layer has 64 channels, kernel size 7, and stride 4.
    The next three have 128, 256, and 512 channels. Each have kernel size 3 and stride 2.
    Think about what the padding should be for each layer to not change spatial resolution.
    Each Conv layer is accompanied by a Batchnorm and ReLU layer.
    Finally, you want to average pool over the spatial dimensions to reduce them to 1 x 1.
    Then, remove (Flatten?) these trivial 1x1 dimensions away.
    Look through https://pytorch.org/docs/stable/nn.html 
    TODO: Fill out the model definition below! 

    Why does a very simple network have 4 convolutions?
    Input images are 224x224. Note that each of these convolutions downsample.
    Downsampling 2x effectively doubles the receptive field, increasing the spatial
    region each pixel extracts features from. Downsampling 32x is standard
    for most image models.

    Why does a very simple network have high channel sizes?
    Every time you downsample 2x, you do 4x less computation (at same channel size).
    To maintain the same level of computation, you 2x increase # of channels, which 
    increases computation by 4x. So, balances out to same computation.
    Another intuition is - as you downsample, you lose spatial information. Want
    to preserve some of it in the channel dimension.
    """
    def __init__(self, block, num_classes=7000):
        super().__init__()


        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) # TODO: Conv group 1
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(3, 2, 1)

        self.in_channel = 64

        self.layer1 = nn.Sequential(
            block(64, 64),
            block(64, 64),
            block(64, 64)
        )
        
        self.layer2 = self._make_layer(block, 128, 4, stride=2)
        self.layer3 = self._make_layer(block, 256, 6, stride=2)
        #self.linear = nn.Linear(512, 512)
        #self.dropout = nn.Dropout(0.2)
        self.layer4 = self._make_layer(block, 512, 3, stride=2)


        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.cls_layer = nn.Linear(512, num_classes)
    
    def _make_layer(self, block, outchannel, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != outchannel:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, outchannel, 1, stride, bias=False),
                nn.BatchNorm2d(outchannel),
            )
        layers = []
        layers.append(block(self.in_channel, outchannel, stride, downsample))
        self.in_channel = outchannel

        for i in range(1, blocks):
            layers.append(block(self.in_channel, outchannel))
        return nn.Sequential(*layers)

    
    def forward(self, x, return_feats=False):
        """
        What is return_feats? It essentially returns the second-to-last-layer
        features of a given image. It's a "feature encoding" of the input image,
        and you can use it for the verification task. You would use the outputs
        of the final classification layer for the classification task.

        You might also find that the classification outputs are sometimes better
        for verification too - try both.
        """
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        #x = self.linear(x)
        #x = self.dropout(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)     # convert 1 X 1 to vector
        x = self.cls_layer(x)

        return x


# Dataset & DataLoader

In [8]:
"""
Transforms (data augmentation) is quite important for this task.
Go explore https://pytorch.org/vision/stable/transforms.html for more details
"""

class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

DATA_DIR = "/content" 
TRAIN_DIR = osp.join(DATA_DIR, "classification/classification/train") # This is a smaller subset of the data. Should change this to classification/classification/train train_subset/train_subset
VAL_DIR = osp.join(DATA_DIR, "classification/classification/dev")
TEST_DIR = osp.join(DATA_DIR, "classification/classification/test")

train_transforms = [ttf.RandomHorizontalFlip(p=0.5),
                    ttf.RandomRotation((-40, 40)),
                    ttf.ToTensor(),
                    ttf.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])] # , ttf.Normalize([0, 0, 0], [1, 1, 1]) ttf.RandomHorizontalFlip(p=0.5), ttf.RandomCrop(32, padding=4),
val_transforms = [ttf.ToTensor(), ttf.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])] # , ttf.Normalize([0, 0, 0], [1, 1, 1]) ttf.CenterCrop((64, 64)),

train_dataset = torchvision.datasets.ImageFolder(TRAIN_DIR,
                                                 transform=ttf.Compose(train_transforms))
val_dataset = torchvision.datasets.ImageFolder(VAL_DIR,
                                               transform=ttf.Compose(val_transforms))


train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True, drop_last=True, num_workers=2, pin_memory=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        drop_last=True, num_workers=2, pin_memory=False)

# Setup everything for training

In [9]:
model = Network(ResBlock)
model.cuda()

# For this homework, we're limiting you to 35 million trainable parameters, as
# outputted by this. This is to help constrain your search space and maintain
# reasonable training times & expectations
num_trainable_parameters = 0
for p in model.parameters():
    num_trainable_parameters += p.numel()
print("Number of Params: {}".format(num_trainable_parameters))

# TODO: What criterion do we use for this task?
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
#optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
# T_max is "how many times will i call scheduler.step() until it reaches 0 lr?"

# For this homework, we strongly strongly recommend using FP16 to speed up training.
# It helps more for larger models.
# Go to https://effectivemachinelearning.com/PyTorch/8._Faster_training_with_mixed_precision
# and compare "Single precision training" section with "Mixed precision training" section
scaler = torch.cuda.amp.GradScaler()

Number of Params: 24875672


In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_best_3'))

<All keys matched successfully>

# Let's train!

In [None]:
for epoch in range(epochs):
    model.train()
    # Quality of life tip: leave=False and position=0 are needed to make tqdm usable in jupyter
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 

    num_correct = 0
    total_loss = 0

    # added after about 80 epochs without this line; re-augmentation for the data; not so sure with this part
    train_dataset = torchvision.datasets.ImageFolder(TRAIN_DIR, transform=ttf.Compose(train_transforms))
    # added after about 80 epochs without this line; for previous re-augmentation; not so sure with this part too
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=2, pin_memory=False)

    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        # Don't be surprised - we just wrap these two lines to make it work for FP16
        with torch.cuda.amp.autocast():    
            outputs = model(x)
            loss = criterion(outputs, y)

        # Update # correct & loss as we go
        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        total_loss += float(loss)

        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.06f}".format(float(optimizer.param_groups[0]['lr'])))
        
        # Another couple things you need for FP16. 
        scaler.scale(loss).backward() # This is a replacement for loss.backward()
        scaler.step(optimizer) # This is a replacement for optimizer.step()
        scaler.update() # This is something added just for FP16

        scheduler.step() # We told scheduler T_max that we'd call step() (len(train_loader) * epochs) many times.

        batch_bar.update() # Update tqdm bar

    #scheduler.step() # We told scheduler T_max that we'd call step() (len(train_loader) * epochs) many times.

    if 80 > (100 * num_correct / (len(train_loader) * batch_size)) > 50: # The best accuracy is hard coded here because there were only few changes
    # save the model to specified directory
      torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_0_tahir')

    #if 85 > (100 * num_correct / (len(train_loader) * batch_size)) > 80: # The best accuracy is hard coded here because there were only few changes
    # save the model to specified directory
      #torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_1_tahir')
    #if 90 > (100 * num_correct / (len(train_loader) * batch_size)) > 85: # The best accuracy is hard coded here because there were only few changes
    # save the model to specified directory
      #torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_2_tahir')
    #if 93> (100 * num_correct / (len(train_loader) * batch_size)) > 90: # The best accuracy is hard coded here because there were only few changes
    # save the model to specified directory
      #torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_3_tahir')
    #if (100 * num_correct / (len(train_loader) * batch_size)) > 94: # The best accuracy is hard coded here because there were only few changes
    # save the model to specified directory
      #torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_4_tahir')

    batch_bar.close() # You need this to close the tqdm bar

    # You can add validation per-epoch here if you would like

    print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        100 * num_correct / (len(train_loader) * batch_size),
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))
    if epoch % 3 == 0:
      model.eval()
      batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
      num_correct = 0
      for i, (x, y) in enumerate(val_loader):

        x = x.cuda()
        y = y.cuda()

        with torch.no_grad():
          outputs = model(x)

        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)))

        batch_bar.update()
      
      batch_bar.close()
      print("Validation: {:.04f}%".format(100 * num_correct / len(val_dataset)))
      if 85 > (100 * num_correct / len(val_dataset)) > 84.7:
        torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_best_4')
      # Final saved model is from here
      if (100 * num_correct / len(val_dataset)) > 85:
        torch.save(model.state_dict(),'/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_best_5')




Epoch 1/70: Train Acc 99.9914%, Train Loss 0.0308, Learning Rate 0.0100




Validation: 84.0343%




Epoch 2/70: Train Acc 99.9993%, Train Loss 0.0301, Learning Rate 0.0100




Epoch 3/70: Train Acc 99.9878%, Train Loss 0.0290, Learning Rate 0.0100




Epoch 4/70: Train Acc 99.9943%, Train Loss 0.0277, Learning Rate 0.0099




Validation: 84.1543%




Epoch 5/70: Train Acc 99.9957%, Train Loss 0.0267, Learning Rate 0.0099




Epoch 6/70: Train Acc 99.9964%, Train Loss 0.0260, Learning Rate 0.0098




Epoch 7/70: Train Acc 99.9993%, Train Loss 0.0249, Learning Rate 0.0098




Validation: 84.1486%




Epoch 8/70: Train Acc 99.9979%, Train Loss 0.0244, Learning Rate 0.0097




Epoch 9/70: Train Acc 99.9957%, Train Loss 0.0243, Learning Rate 0.0096




Epoch 10/70: Train Acc 99.9986%, Train Loss 0.0237, Learning Rate 0.0095




Validation: 84.2286%




Epoch 11/70: Train Acc 99.9986%, Train Loss 0.0232, Learning Rate 0.0094




Epoch 12/70: Train Acc 100.0000%, Train Loss 0.0228, Learning Rate 0.0093




Epoch 13/70: Train Acc 99.9957%, Train Loss 0.0228, Learning Rate 0.0092




Validation: 84.2857%




Epoch 14/70: Train Acc 99.9979%, Train Loss 0.0222, Learning Rate 0.0090




Epoch 15/70: Train Acc 99.9986%, Train Loss 0.0221, Learning Rate 0.0089




Epoch 16/70: Train Acc 99.9971%, Train Loss 0.0216, Learning Rate 0.0088




Validation: 84.2514%




Epoch 17/70: Train Acc 99.9979%, Train Loss 0.0216, Learning Rate 0.0086




Epoch 18/70: Train Acc 99.9993%, Train Loss 0.0212, Learning Rate 0.0085




Epoch 19/70: Train Acc 99.9986%, Train Loss 0.0208, Learning Rate 0.0083




Validation: 84.4686%




Epoch 20/70: Train Acc 99.9964%, Train Loss 0.0209, Learning Rate 0.0081




Epoch 21/70: Train Acc 99.9993%, Train Loss 0.0207, Learning Rate 0.0079




Epoch 22/70: Train Acc 99.9986%, Train Loss 0.0205, Learning Rate 0.0078




Validation: 84.5086%




Epoch 23/70: Train Acc 99.9993%, Train Loss 0.0201, Learning Rate 0.0076




Epoch 24/70: Train Acc 100.0000%, Train Loss 0.0200, Learning Rate 0.0074




Epoch 25/70: Train Acc 99.9979%, Train Loss 0.0200, Learning Rate 0.0072




Validation: 84.5543%




Epoch 26/70: Train Acc 100.0000%, Train Loss 0.0197, Learning Rate 0.0070




Epoch 27/70: Train Acc 99.9986%, Train Loss 0.0196, Learning Rate 0.0068




Epoch 28/70: Train Acc 99.9993%, Train Loss 0.0197, Learning Rate 0.0065




Validation: 84.6371%




Epoch 29/70: Train Acc 100.0000%, Train Loss 0.0193, Learning Rate 0.0063




Epoch 30/70: Train Acc 99.9986%, Train Loss 0.0192, Learning Rate 0.0061




Epoch 31/70: Train Acc 100.0000%, Train Loss 0.0191, Learning Rate 0.0059




Validation: 84.6829%




Epoch 32/70: Train Acc 100.0000%, Train Loss 0.0188, Learning Rate 0.0057




Epoch 33/70: Train Acc 100.0000%, Train Loss 0.0189, Learning Rate 0.0054




Epoch 34/70: Train Acc 99.9993%, Train Loss 0.0187, Learning Rate 0.0052




Validation: 84.7914%




Epoch 35/70: Train Acc 100.0000%, Train Loss 0.0184, Learning Rate 0.0050




Epoch 36/70: Train Acc 100.0000%, Train Loss 0.0184, Learning Rate 0.0048




Epoch 37/70: Train Acc 100.0000%, Train Loss 0.0181, Learning Rate 0.0046




Validation: 84.9486%




Epoch 38/70: Train Acc 99.9993%, Train Loss 0.0181, Learning Rate 0.0043




Epoch 39/70: Train Acc 99.9993%, Train Loss 0.0180, Learning Rate 0.0041




Epoch 40/70: Train Acc 100.0000%, Train Loss 0.0180, Learning Rate 0.0039




Validation: 84.8400%




Epoch 41/70: Train Acc 99.9993%, Train Loss 0.0179, Learning Rate 0.0037




Epoch 42/70: Train Acc 100.0000%, Train Loss 0.0177, Learning Rate 0.0035




Epoch 43/70: Train Acc 100.0000%, Train Loss 0.0176, Learning Rate 0.0032




Validation: 84.9514%




Epoch 44/70: Train Acc 100.0000%, Train Loss 0.0175, Learning Rate 0.0030




Epoch 45/70: Train Acc 99.9993%, Train Loss 0.0176, Learning Rate 0.0028




Epoch 46/70: Train Acc 100.0000%, Train Loss 0.0174, Learning Rate 0.0026




Validation: 84.9600%




Epoch 47/70: Train Acc 99.9993%, Train Loss 0.0173, Learning Rate 0.0024




Epoch 48/70: Train Acc 100.0000%, Train Loss 0.0172, Learning Rate 0.0022




Epoch 49/70: Train Acc 99.9993%, Train Loss 0.0172, Learning Rate 0.0021




Validation: 84.9943%




Epoch 50/70: Train Acc 100.0000%, Train Loss 0.0170, Learning Rate 0.0019




Epoch 51/70: Train Acc 100.0000%, Train Loss 0.0170, Learning Rate 0.0017




Epoch 52/70: Train Acc 100.0000%, Train Loss 0.0170, Learning Rate 0.0015




Validation: 85.0000%




Epoch 53/70: Train Acc 100.0000%, Train Loss 0.0169, Learning Rate 0.0014




Epoch 54/70: Train Acc 100.0000%, Train Loss 0.0168, Learning Rate 0.0012




Epoch 55/70: Train Acc 100.0000%, Train Loss 0.0167, Learning Rate 0.0011




Validation: 85.0086%




Epoch 56/70: Train Acc 100.0000%, Train Loss 0.0168, Learning Rate 0.0010




Epoch 57/70: Train Acc 100.0000%, Train Loss 0.0167, Learning Rate 0.0008




Epoch 58/70: Train Acc 100.0000%, Train Loss 0.0167, Learning Rate 0.0007




Validation: 85.0343%




Epoch 59/70: Train Acc 100.0000%, Train Loss 0.0166, Learning Rate 0.0006




Epoch 60/70: Train Acc 100.0000%, Train Loss 0.0165, Learning Rate 0.0005




Epoch 61/70: Train Acc 100.0000%, Train Loss 0.0164, Learning Rate 0.0004




Validation: 85.0457%




Epoch 62/70: Train Acc 100.0000%, Train Loss 0.0165, Learning Rate 0.0003




Epoch 63/70: Train Acc 99.9993%, Train Loss 0.0165, Learning Rate 0.0002




Epoch 64/70: Train Acc 100.0000%, Train Loss 0.0165, Learning Rate 0.0002




Validation: 85.0486%




Epoch 65/70: Train Acc 100.0000%, Train Loss 0.0164, Learning Rate 0.0001




Epoch 66/70: Train Acc 100.0000%, Train Loss 0.0164, Learning Rate 0.0001




Epoch 67/70: Train Acc 100.0000%, Train Loss 0.0163, Learning Rate 0.0000




Validation: 85.0457%




Epoch 68/70: Train Acc 100.0000%, Train Loss 0.0164, Learning Rate 0.0000




Epoch 69/70: Train Acc 100.0000%, Train Loss 0.0162, Learning Rate 0.0000




Epoch 70/70: Train Acc 100.0000%, Train Loss 0.0163, Learning Rate 0.0000




Validation: 85.0629%


In [None]:
# Load the best model for evaluation
model = Network(ResBlock)
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/best_model_hw2p2_best_5'))
model.cuda()

Network(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU()
    )
    (1): ResBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU()
      (conv2): Co

# Classification Task: Validation

In [None]:
model.eval()
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
num_correct = 0
for i, (x, y) in enumerate(val_loader):

    x = x.cuda()
    y = y.cuda()

    with torch.no_grad():
        outputs = model(x)

    num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
    batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)))

    batch_bar.update()
    
batch_bar.close()
print("Validation: {:.04f}%".format(100 * num_correct / len(val_dataset)))

                                                                    

Validation: 84.4314%




# Classification Task: Submit to Kaggle

In [None]:
class ClassificationTestSet(Dataset):
    # It's possible to load test set data using ImageFolder without making a custom class.
    # See if you can think it through!

    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        return self.transforms(Image.open(self.img_paths[idx]))

In [None]:
test_dataset = ClassificationTestSet(TEST_DIR, ttf.Compose(val_transforms))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                         drop_last=False, num_workers=1)

In [None]:
model.eval()
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, position=0, leave=False, desc='Test')

res = []

for i, (x) in enumerate(test_loader):

    x = x.cuda()
    with torch.no_grad():
      output = model(x)
    outputIndex = torch.argmax(output, axis=1)
    for j in outputIndex:
      res.append(j)

    
    # TODO: Finish predicting on the test set.
    

    batch_bar.update()
    
batch_bar.close()



In [None]:
with open("classification_early_submission.csv", "w+") as f:
    f.write("id,label\n")
    for i in range(len(test_dataset)):
        f.write("{},{}\n".format(str(i).zfill(6) + ".jpg", res[i]))

In [None]:
!kaggle competitions submit -c 11-785-s22-hw2p2-classification -f classification_early_submission.csv -m yanyuc

100% 541k/541k [00:03<00:00, 176kB/s]
Successfully submitted to Face Recognition

# Verification Task: Validation

There are 6K verification dev images, but 166K "pairs" for you to compare. So, it's much more efficient to compute the features for the 6K verification images, and just compare afterwards.

This will be done by creating a dictionary mapping the image file names to the features. Then, you'll use this dictionary to compute the similarities for each pair.

In [None]:
!ls verification/verification/dev | wc -l
!cat verification/verification/verification_dev.csv | wc -l

6000
166801


In [None]:
class VerificationDataset(Dataset):
    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        # We return the image, as well as the path to that image (relative path)
        return self.transforms(Image.open(self.img_paths[idx])), osp.relpath(self.img_paths[idx], self.data_dir)

In [None]:
val_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/dev"),
                                       ttf.Compose(val_transforms))
val_ver_loader = torch.utils.data.DataLoader(val_veri_dataset, batch_size=batch_size, 
                                             shuffle=False, num_workers=1)

In [None]:
model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(val_ver_loader), total=len(val_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()

    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try the final outputs too!
        feats = model(imgs, return_feats=True) 
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.
    for i in range(len(path_names)):
      feats_dict[path_names[i]] = feats[i]



In [None]:
# What does this dict look like?
print(list(feats_dict.items())[0])

('000b28b024.jpg', tensor([ 3.1250e-01,  6.7108e-01, -1.3524e+00, -1.0195e+00,  4.5858e-02,
        -2.0668e-01,  8.5356e-01,  4.7467e-01, -2.8756e-01,  3.0829e-01,
         9.3832e-01,  1.5700e-01,  2.1148e-01, -5.2564e-01,  6.2312e-01,
         2.1136e+00,  6.6637e-01,  1.1524e+00,  1.0871e+00, -4.3531e-01,
        -1.4192e+00,  1.0131e-01,  8.4022e-01,  1.2030e+00, -4.2495e-01,
        -3.3430e+00, -8.4064e-01,  1.9372e+00,  2.3399e+00, -8.1316e-01,
        -3.9912e-01,  2.2055e+00, -1.0767e+00,  6.4047e-01, -2.3388e-01,
        -1.5381e-02, -1.9494e+00,  2.0116e+00, -6.4230e-01,  5.0314e-01,
         1.2033e+00,  1.5096e+00,  1.3473e-01,  2.5459e-01, -9.1969e-01,
        -5.1891e-01, -7.6309e-01, -7.5692e-01, -1.3781e+00, -1.0287e+00,
         2.3260e-01, -6.7570e-02,  1.5267e+00, -1.8980e+00, -3.1474e+00,
        -9.5311e-01, -7.2163e-01,  2.0189e-01,  2.8438e+00,  1.9456e+00,
         1.8539e+00,  9.7716e-01, -1.6888e+00,  3.9599e-01,  1.1082e+00,
        -7.1216e-02,  1.4018e+00

In [None]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
similarity_metric = nn.CosineSimilarity(dim=0, eps=1e-6)

val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_dev.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
gt_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2, gt = line.split(",")

    # TODO: Use the similarity metric
    # How to use these img_paths? What to do with the features?
    # similarity = similarity_metric(...)

    feats1 = feats_dict.get(img_path1[4:])
    feats2 = feats_dict.get(img_path2[4:])
    similarity = similarity_metric(feats1, feats2)
    similarity = similarity.to(torch.device('cpu'))
    pred_similarities.append(similarity)

    gt_similarities.append(int(gt))

pred_similarities = np.array(pred_similarities)
gt_similarities = np.array(gt_similarities)

print("AUC:", roc_auc_score(gt_similarities, pred_similarities))



AUC: 0.9586054596376377


# Verification Task: Submit to Kaggle

In [None]:
test_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/test"),
                                        ttf.Compose(val_transforms))
test_ver_loader = torch.utils.data.DataLoader(test_veri_dataset, batch_size=batch_size, 
                                              shuffle=False, num_workers=1)

In [None]:
model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(test_ver_loader), total=len(test_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()

    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try to final outputs too!
        feats = model(imgs, return_feats=True) 
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.
    for i in range(len(path_names)):
      feats_dict[path_names[i]] = feats[i]



In [None]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
# similarity_metric = 
val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_test.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2 = line.split(",")

    # TODO: Finish up verification testing.
    # How to use these img_paths? What to do with the features?
    feats1 = feats_dict.get(img_path1[5:])
    feats2 = feats_dict.get(img_path2[5:])
    similarity = similarity_metric(feats1, feats2)
    similarity = similarity.to(torch.device('cpu'))
    pred_similarities.append(similarity)



In [None]:
with open("verification_early_submission.csv", "w+") as f:
    f.write("id,match\n")
    for i in range(len(pred_similarities)):
        f.write("{},{}\n".format(i, pred_similarities[i]))

In [None]:
!kaggle competitions submit -c 11-785-s22-hw2p2-verification -f verification_early_submission.csv -m yanyuc

100% 16.9M/16.9M [00:03<00:00, 4.97MB/s]
Successfully submitted to Face Verification

# Extras

In [None]:
# If you keep re-initializing your model in Colab, can run out of GPU memory, need to restart.
# These three lines can help that - run this before you re-initialize your model

del model
torch.cuda.empty_cache()
!nvidia-smi