# Alethea's Attempt at ResNet-34

In [1]:
import cProfile
import os
import pstats
from pstats import SortKey


import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb

In [2]:
config = {
            'batch_size': 700,
            #'dataset': 'imagenette2-320',
            'dataset': 'oxford-iiit-pet',
            'dropout': 0.1,
            'init_gain': 5,
            'initializer': None,
            'learning_rate': 0.1,
            'load_workers': os.cpu_count(), 
            'max_epochs': 1000,
            'optimizer': 'SGD',
            'random_seed': 1,
            'training_loops': 4,
            'cuda_device_ids': [3, 2, 1, 0],
         }

In [3]:
if config['random_seed']:
    torch.manual_seed(config['random_seed'])
    torch.cuda.manual_seed(config['random_seed'])

In [4]:
wandb.init(project="my-resnet34-augmenting", config=config)


W&B Run: https://app.wandb.ai/aletheap/my-resnet34-augmenting/runs/c9yv51lj

## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [5]:
#import nvidia.dali.ops as ops
#import nvidia.dali.types as types
#
#image_dir = "data/images"
#batch_size = 8
#
#class SimplePipeline(Pipeline):
#    def __init__(self, batch_size, num_threads, device_id):
#        super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
#        self.input = ops.FileReader(file_root = image_dir)
#        # instead of path to file directory file with pairs image_name image_label_value can be provided
#        # self.input = ops.FileReader(file_root = image_dir, file_list = image_dir + '/file_list.txt')
#        self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)
#
#    def define_graph(self):
#        jpegs, labels = self.input()
#        images = self.decode(jpegs)
#        return (images, labels)

In [6]:

# Note to self: Try this: https://www.basicml.com/performance/2019/04/16/pytorch-data-augmentation-with-nvidia-dali.html
# cause I think cpu image transforms are a bottleneck for me



transform = transforms.Compose([transforms.RandomAffine(30, translate=(.2, .2), scale=(.8, 1.2), shear=None, resample=False, fillcolor=0),
                                transforms.Resize(256),
                                transforms.RandomCrop(224),
                                transforms.ColorJitter(brightness=.5, contrast=.5, saturation=.5, hue=.5),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(180),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])

dev_test_transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = os.path.join("/home/apower/data", config['dataset']) 
print('datadir:', datadir)
traindir = os.path.join(datadir, 'train')
print('traindir:', traindir)
devdir = os.path.join(datadir, 'dev')
print('devdir:', devdir)
#testdir = os.path.join(datadir, 'test')
#print('testdir:', testdir)

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, dev_test_transform)
#X_test = torchvision.datasets.ImageFolder(testdir, dev_test_transform)

num_labels = len(X_train.classes)

#print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)
print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\nlabels:', num_labels)

datadir: /home/apower/data/oxford-iiit-pet
traindir: /home/apower/data/oxford-iiit-pet/train
devdir: /home/apower/data/oxford-iiit-pet/dev
training_set: 5760 
dev_set: 800 
labels: 38


In [7]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [8]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_dev[0][0])

In [9]:
train_loader = DataLoader(X_train, batch_size=config['batch_size'], shuffle=True, num_workers=config['load_workers'])
dev_loader = DataLoader(X_dev, batch_size=1, shuffle=False)
#test_loader = DataLoader(X_test, batch_size=1, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

In [10]:
class Projection(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
                
        if in_channels == out_channels:
            self.proj = None 
        else:
            self.proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            if config['initializer']:
                getattr(nn.init, config['initializer'])(self.proj.weight, gain=config['init_gain'])

    def forward(self, X):
        if self.proj:
            return self.proj(X)
        else:
            return X

In [11]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        if in_channels == out_channels:
            stride = 1
        else:
            stride = 2

        conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        if config['initializer']:
            getattr(nn.init, config['initializer'])(conv1.weight, gain=config['init_gain'])
        
        conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if config['initializer']:
            getattr(nn.init, config['initializer'])(conv2.weight, gain=config['init_gain'])
        
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=config['dropout']),
                                    nn.ReLU())
        self.layer2 = nn.Sequential(conv2,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=config['dropout']),
                                    nn.ReLU())

        self.proj = Projection(in_channels, out_channels)

    def forward(self, X):
        a = X
        a = self.layer1(a)
        a = self.layer2(a)
        return a + self.proj(X)

In [12]:
class ResNet34(nn.Module):

    def __init__(self, num_labels):
        super().__init__()
        
        self. num_labels = num_labels
        
        # 7x7 Conv
        conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2)
        if config['initializer']:
            getattr(nn.init, config['initializer'])(conv1.weight, gain=config['init_gain'])
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(num_features=64),
                                    nn.Dropout2d(p=config['dropout']),
                                    nn.ReLU())

        # 3x3 MaxPool
        self.layer2 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.ReLU())

        # Stage 1
        self.stage1 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64))

        # Stage 2
        self.stage2 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128))

        # Stage 3
        self.stage3 = nn.Sequential(ResidualBlock(in_channels=128, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256))
        
        # Stage 4
        self.stage4 = nn.Sequential(ResidualBlock(in_channels=256, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512))
        

        # AveragePool
        self.avgpool = nn.AvgPool2d(kernel_size=2)
        
        # Fully Connected
        lin = nn.Linear(in_features=4608, out_features=num_labels)
        if config['initializer']:
            getattr(nn.init, config['initializer'])(lin.weight, gain=config['init_gain'])        
        self.fc = nn.Sequential(lin, nn.Softmax(dim=1)) 

        
    def forward(self, X):
        prefix = ' ' * 0
        a = X       
        a = self.layer1(X)
        a = self.layer2(a)
        a = self.stage1(a)
        a = self.stage2(a)
        a = self.stage3(a)
        a = self.stage4(a)
        a = self.avgpool(a)
        a = a.reshape(a.size(0), -1)
        a = self.fc(a)
        return a
        

## Train the Model



In [13]:
device = torch.device('cuda:' + str(config['cuda_device_ids'][0]))
model = ResNet34(num_labels=num_labels)
print("Let's use", len(config['cuda_device_ids']), "GPUs:")

for i in config['cuda_device_ids']:
    print(i, ":", torch.cuda.get_device_name(i))

model = nn.DataParallel(model, device_ids=config['cuda_device_ids'])
model = model.to(device)

Let's use 4 GPUs:
3 : GeForce RTX 2080 Ti
2 : GeForce RTX 2080 Ti
1 : GeForce RTX 2080 Ti
0 : GeForce RTX 2080 Ti


In [14]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [15]:
def accuracy(model, loader, name, cpu=False):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            if not cpu:
                images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

In [16]:
def _do_epoch(model, loader, learning_rate, criterion, optimizer):
    for local_batch, local_labels in loader:
        # Transfer to GPU
        X, y = local_batch.to(device), local_labels.to(device)
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y)
        loss.item()  # <-- If you delete this it won't learn
        loss.backward()
        optimizer.step()
    return (model, loss.item()) 

In [17]:
def train_model(model, loader, learning_rate=0.1, max_epochs=20):
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, config['optimizer'])(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        model.train()
        t0 = time.time()
        model, loss = _do_epoch(model, loader, learning_rate, criterion, optimizer)
        t1 = time.time()
        duration = t1-t0
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        relative_accuracy = dev_accuracy / train_accuracy
        torch.save(model.state_dict(), './resnet-augmenting-' + config['dataset'] + '.pt')
        print(' ' * 4, 
              '%.1f seconds -' % (duration), 
              'epoch:', epoch, 
              'lr: %.3f  ' % learning_rate,
              'loss: %.3f  ' % loss, 
              'train: %.3f  ' % train_accuracy, 
              'dev: %.3f  ' % dev_accuracy, 
              'relative_accuracy: %.3f  ' % relative_accuracy)
        try:
            wandb.log({'loss': loss, 
                   'learning_rate': learning_rate,
                   'secs_per_epoch': duration, 
                   'train_accuracy': train_accuracy, 
                   'dev_accuracy': dev_accuracy, 
                   'relative_accuracy': relative_accuracy})
        except:
            pass

    return model

In [None]:
learning_rate = config['learning_rate']

for i in range(config['training_loops']):
    # train model
    print('learning_rate:', learning_rate, 'max_epochs:', config['max_epochs'])
    model = train_model(model, 
                        train_loader, 
                        learning_rate=learning_rate, 
                        max_epochs=config['max_epochs'])

    # slow down learning
    learning_rate = learning_rate / 10


learning_rate: 0.1 max_epochs: 1000
     25.9 seconds - epoch: 0 lr: 0.100   loss: 3.637   train: 2.847   dev: 3.125   relative_accuracy: 1.098  
     15.8 seconds - epoch: 1 lr: 0.100   loss: 3.643   train: 2.830   dev: 3.500   relative_accuracy: 1.237  


In [None]:
learning_rate = config['learning_rate']
loader = train_loader
max_epochs=config['max_epochs']

model.train()
criterion = nn.CrossEntropyLoss()
optimizer = getattr(torch.optim, config['optimizer'])(model.parameters(), lr=learning_rate)

In [None]:
t0=time.time()
cProfile.run('_do_epoch(model, loader, learning_rate, criterion, optimizer)', 'resnet.prof')
t1=time.time()
print(t1-t0, 'seconds')

In [None]:
!flameprof resnet.prof > resnet_prof.svg

In [None]:
from IPython.core.display import SVG
SVG(filename='resnet_prof.svg')

In [None]:
p = pstats.Stats('restats')

In [None]:
p.strip_dirs().sort_stats(SortKey.CUMULATIVE).print_stats(30)


In [None]:
p.print_callers(10)