<a href="https://colab.research.google.com/github/asangt/image_models/blob/main/resnet_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clone GitHub and install Weights & Biases

In [None]:
!git clone https://github.com/asangt/image_models

Cloning into 'image_models'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 26 (delta 4), reused 23 (delta 4), pack-reused 0[K
Unpacking objects: 100% (26/26), done.


In [None]:
!pip install -q wandb

[K     |████████████████████████████████| 1.7 MB 8.5 MB/s 
[K     |████████████████████████████████| 97 kB 3.8 MB/s 
[K     |████████████████████████████████| 143 kB 55.0 MB/s 
[K     |████████████████████████████████| 180 kB 69.2 MB/s 
[K     |████████████████████████████████| 63 kB 1.9 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
!nvidia-smi

Sat Jan 29 15:35:39 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    29W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Notebook

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import wandb

from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

from image_models.models.resnet import ResNet, _conv2d_bn_act, BottleneckBlock

In [None]:
# define a smaller ResNet50 architecture - https://github.com/kuangliu/pytorch-cifar

class ResNet50_CIFAR(ResNet):

    def __init__(self, n_classes, n_channels):
        super(ResNet50_CIFAR, self).__init__(n_classes, n_channels, [3, 4, 6, 3], 'bottleneck')

        self.conv1 = _conv2d_bn_act(in_channels=n_channels, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2_x = nn.Sequential(
            BottleneckBlock(64, 64),
            *[BottleneckBlock(64 * BottleneckBlock._expansion, 64) for i in range(2)]
        )
        
        self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=1)

        self.feature_extractor = nn.Sequential(
            self.conv1,
            self.conv2_x,
            self.conv3_x,
            self.conv4_x,
            self.conv5_x
        )

In [None]:
# training functions

def accuracy(y_pred, y):
    return (y_pred == y).float().sum() / len(y)

def run_epoch(model, dataloader, loss_function, optimizer, metric, device):
    epoch_loss, epoch_metric = 0, 0
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        
        # forward pass
        m_out = model(X)
        loss = loss_function(m_out, y)
        
        # backward pass
        if optimizer:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if metric:
            epoch_metric += metric(m_out.softmax(dim=1).argmax(dim=1), y)
        
        epoch_loss += loss.item()
    
    epoch_loss /= len(dataloader)
    epoch_metric /= len(dataloader)
    
    return epoch_loss, epoch_metric

def train(
    model, n_epochs, dataloaders, loss_function, optimizer, scheduler=None, metric=None, device=None, logger=None
):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for epoch in range(1, n_epochs + 1):
        model.train()
        train_loss, train_metric = run_epoch(model, dataloaders['train'], loss_function, optimizer, metric, device)
        
        if 'valid' in dataloaders:
            model.eval()
            valid_loss, valid_metric = run_epoch(model, dataloaders['valid'], loss_function, None, metric, device)
        
        if scheduler:
            if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau) and 'valid' in dataloaders:
                scheduler.step(valid_loss)
            else:
                scheduler.step()
        
        if logger:
            logger.log({
                'train loss' : train_loss,
                'train accuracy' : train_metric * 100,
                'valid loss' : valid_loss,
                'valid accuracy' : valid_metric * 100
            })
    
    return model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ResNet50_CIFAR(10, 3)

n_epochs = 100
batch_size = 128
loss_function = nn.CrossEntropyLoss()

learning_rate = 0.1
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True)

model = model.to(device)

In [None]:
# image transforms

train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=(0.9, 1.1), saturation=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))                                   
])

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
valid_set  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transforms)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=2)

dataloaders = {
    'train' : train_loader,
    'valid' : valid_loader
}

Files already downloaded and verified
Files already downloaded and verified


In [None]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33masang[0m (use `wandb login --relogin` to force relogin)


In [None]:
wandb.init(name='resnet50_cifar10', project='image_models', entity='asang')

wandb.config = {
    'learning_rate' : learning_rate,
    'epochs' : n_epochs,
    'batch_size' : batch_size
}

model = train(model, n_epochs, dataloaders, loss_function, optimizer, scheduler, accuracy, device, wandb)

[34m[1mwandb[0m: Currently logged in as: [33masang[0m (use `wandb login --relogin` to force relogin)


Epoch    40: reducing learning rate of group 0 to 1.0000e-02.
Epoch    59: reducing learning rate of group 0 to 1.0000e-03.
