<a href="https://colab.research.google.com/github/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm/blob/oscar2/notebooks/oscar_test_config_kernel_size.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Wandb Install, Login, Import

In [1]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!wandb login "6f19b1e6735ebc69af24f18d5b426262416027fb"

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [3]:
import wandb

# Clone Team's Code

In [4]:
!rm -r /content/csgy6953_DeepLearning_Midterm/

In [5]:
!git clone -b config_kernel_size "https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git"

Cloning into 'csgy6953_DeepLearning_Midterm'...
remote: Enumerating objects: 569, done.[K
remote: Counting objects: 100% (294/294), done.[K
remote: Compressing objects: 100% (160/160), done.[K
remote: Total 569 (delta 171), reused 211 (delta 128), pack-reused 275[K
Receiving objects: 100% (569/569), 148.54 KiB | 601.00 KiB/s, done.
Resolving deltas: 100% (354/354), done.


In [6]:
!cp -r /content/csgy6953_DeepLearning_Midterm/src/ .

In [7]:
!cat src/transforms.py

import torch
import torchvision.transforms as transforms
from torchvision.transforms import autoaugment

from typing import Tuple


def make_transforms(means: torch.Tensor, std_devs: torch.Tensor) -> Tuple:
    '''
    Given a tensor of computed means and a tensor of computed standard devations,
    return's a tuple containing a train and test transform pipelines
    '''
    train_transforms = transforms.Compose([
        transforms.RandomRotation(5),
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomCrop(32, padding=2),
        transforms.ToTensor(),
        transforms.Normalize(mean=means,
                             std=std_devs)
    ])

    test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=means,
                             std=std_devs)
    ])

    return train_transforms, test_transforms


def make_auto_transforms(means: torch.Tensor, std_devs: torch.Tensor) -> Tuple:
    '''
    Utilizes PyTorch'es Auto

# Import, Seed, Device

In [8]:
import torch
import torch.nn as nn

import time
import random

In [9]:
SEED = 1234

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Load Data

In [11]:
from src.data import get_transformed_data, make_data_loaders
from src.transforms import make_auto_transforms # used to use: make_transforms

BATCH_SIZE = 4
VALID_RATIO = 0.1

train_data, valid_data, test_data = \
get_transformed_data(make_transforms=make_auto_transforms, valid_ratio=VALID_RATIO)

train_iter, valid_iter, test_iter = \
make_data_loaders(train_data, valid_data, test_data, BATCH_SIZE)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


# Model

In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from typing import List, Tuple, Optional


class ResidualBlock(nn.Module):
    '''
    Class representing a convolutional residual block 
    '''

    def __init__(self, num_channels: int, use_stem: bool = False, strides: int = 1, dropout: Optional[float] = None, kernel_size: int = 3):
        '''
        Creates a new instance of a Residual Block
        @param: num_channels (int) - the number of output channels for all convolutions in 
            the block
        @param: use_stem (bool) - whether a 1x1 convolution is needed to downsample the
            residual
        @param: strides (int) - the number of strides to use in the convolutions, defaults to 1
        @param: dropout (float) - if present, adds a dropout between the hidden layers
        '''
        super().__init__()
        self.num_channels = num_channels
        self.use_stem = use_stem
        self.strides = strides

        self.kernel_size = kernel_size

        self.dropout = nn.Dropout(dropout) if dropout is not None else None

        if self.kernel_size == 3:
            self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1, stride=strides)
            self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)

        # partial solution 1:
        if kernel_size == 2:
            self.conv1 = nn.LazyConv2d(num_channels, kernel_size=2, padding=1, stride=strides)
            self.conv2 = nn.LazyConv2d(num_channels, kernel_size=2, padding=0)
            # fetal issue: when input is [256,1,1], MUST pad in order to apply 2x2 kernel

        # # partial solution 2:
        # if self.kernel_size == 2:
        #     self.conv1 = nn.LazyConv2d(num_channels, kernel_size=2, padding=1, stride=strides)
        #     self.conv2 = nn.LazyConv2d(num_channels, kernel_size=2, padding=1)

        self.relu = nn.ReLU(inplace=True)
        self.out = nn.ReLU(inplace=True)
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()

        self.conv_stem = None
        if use_stem:

            if kernel_size == 3:
                self.conv_stem = nn.LazyConv2d(num_channels, kernel_size=1, stride=strides)
            if kernel_size == 2:
                self.conv_stem = nn.LazyConv2d(num_channels, kernel_size=1, stride=strides)

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        print()
        print('--------ResidualBlock:--------')
        print(f'(use_stem={self.use_stem})')
        print('block input:', inputs.shape)

        shortcut = inputs

        print()
        print('F(x):', self.conv1)
        x = self.relu(self.bn1(self.conv1(inputs)))
        print('output:', x.shape)

        if self.dropout is not None:
            x = self.dropout(x)
        
        print()
        print('F(x):', self.conv2)
        x = self.bn2(self.conv2(x))
        print('output:', x.shape)

        if self.use_stem:
            # downsample skip connection
            print()
            print('S(x):', self.conv_stem)
            shortcut = self.conv_stem(shortcut)
            print('output:', shortcut.shape)

        # partial solution 2:
        # if self.kernel_size == 2:
        #     if x.shape[-1] > shortcut.shape[-1]:
        #         x = F.pad(x, pad = (-1, -1, -1, -1))
        #         print('negative padding =>', x.shape)
        #         # fetal error: when F(x)'s output becomes [512,2,2] whiel input was [256,1,1], this would reduce F(x)'s output to [512,0,0]
 
        # add in skip connection
        x += shortcut

        return self.out(x)


class StemConfig:
    '''
    convenience class to encapsulate configuration options
    for the ResNet stem
    '''

    def __init__(self, num_channels, kernel_size, stride, padding):
        self.num_channels = num_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding


class ResNet(nn.Module):
    '''
    Class representing a full ResNet model
    '''

    def __init__(self, architecture: List[Tuple[int, int, float]], stem_config: Optional[StemConfig], output_size: int = 10, *args, **kwargs):
        '''
        returns an instance of a ResNet
        '''
        super().__init__()
        if stem_config is not None:
            self.stem = self.create_stem(
                stem_config.num_channels,
                stem_config.kernel_size,
                stem_config.stride,
                stem_config.padding
            )
        else:
            self.stem = self.create_stem()
        self.classifier = self.create_classifier(output_size)

        self.body = nn.Sequential()
        for idx, block_def in enumerate(architecture):
            self.body.add_module(
                f"block_{idx+2}", self.create_block(*block_def, first_block=(idx == 0)))

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        """
        Performs forward pass of the inputs through the network
        """
        x = self.stem(inputs)
        x = self.body(x)
        return self.classifier(x)

    def create_stem(self, num_channels: int = 64, kernel_size: int = 7, stride: int = 2, padding: int = 3) \
            -> nn.Sequential:
        """
        Creates a sequential stem as the first component of the model
        """
        return nn.Sequential(
            nn.LazyConv2d(num_channels, kernel_size=kernel_size,
                          padding=padding, stride=stride),
            nn.LazyBatchNorm2d(),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

    def create_classifier(self, num_classes: int) -> nn.Sequential:
        '''
        Creates a sequential classifier head at the very 
        '''
        return nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.LazyLinear(num_classes)
        )

    def create_block(self, num_residuals: int, num_channels: int, dropout: float, kernel_size: int, first_block: bool = False) -> nn.Sequential:
        layer = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                layer.append(ResidualBlock(num_channels, dropout=dropout, kernel_size=kernel_size, use_stem=True, strides=2))
            else:
                layer.append(ResidualBlock(num_channels, dropout=dropout, kernel_size=kernel_size))
        return nn.Sequential(*layer)


In [79]:
# from src.model import ResNet, StemConfig
from src.model import StemConfig

# 1 tuple == 1 layer
# how many blocks in each layer, out_channels, dropout prob, kernel_size
architecture = [
    (2, 64, 0.5, 2),
    (2, 128, 0.5, 2),
    (2, 256, 0.5, 2),
    (2, 512, 0.5, 2),
]

config = StemConfig(num_channels=64, kernel_size=3, stride=1, padding=1)

model  = ResNet(architecture, stem_config=config, output_size=10)

In [80]:
from src.utils import count_parameters

In [81]:
# intialize a new model

# inputs = torch.empty((BATCH_SIZE, 3, 512, 512)) # passed
inputs = torch.empty((BATCH_SIZE, 3, 32, 32)) #  passed
# inputs = torch.empty((BATCH_SIZE, 3, 4, 4))

inputs.normal_()

model = model.to(device)

outputs = model(inputs.to(device)) 
# (Oscar) observation: 2022.11/13(7)_a08.14: internally, this converts all nn.LazyConv2d layers to nn.Conv2d
# to see this, run print(model) both before and after this operation

print('-------------------')
print('-------------------')

print(model)
print(count_parameters(model))
print(outputs.size())


--------ResidualBlock:--------
(use_stem=False)
block input: torch.Size([4, 64, 32, 32])

F(x): LazyConv2d(0, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
output: torch.Size([4, 64, 33, 33])

F(x): LazyConv2d(0, 64, kernel_size=(2, 2), stride=(1, 1))
output: torch.Size([4, 64, 32, 32])

--------ResidualBlock:--------
(use_stem=False)
block input: torch.Size([4, 64, 32, 32])

F(x): LazyConv2d(0, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
output: torch.Size([4, 64, 33, 33])

F(x): LazyConv2d(0, 64, kernel_size=(2, 2), stride=(1, 1))
output: torch.Size([4, 64, 32, 32])

--------ResidualBlock:--------
(use_stem=True)
block input: torch.Size([4, 64, 32, 32])

F(x): LazyConv2d(0, 128, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
output: torch.Size([4, 128, 17, 17])

F(x): LazyConv2d(0, 128, kernel_size=(2, 2), stride=(1, 1))
output: torch.Size([4, 128, 16, 16])

S(x): LazyConv2d(0, 128, kernel_size=(1, 1), stride=(2, 2))
output: torch.Size([4, 128, 16, 16])

----

In [69]:
from src.utils import initialize_parameters, epoch_time