# Alethea's Attempt at ResNet-34

In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb

In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 GeForce RTX 2080 Ti
1 GeForce GTX 1080 Ti


In [3]:
config = {
            'device': 'cuda:0',
            'initializer': None,
            'init_gain': 5,
            'learning_rate': 0.1,
            'load_workers': os.cpu_count(), 
            'batch_size': 190,
            'max_epochs': 200,
            'training_loops': 1,
            'dropout': 0.5,
            'optimizer': 'SGD',
            'dataset': 'oxford-iiit-pet',
            'random_seed': 1,
         }
wandb.init(project="my-resnet34-augmenting", config=config)


W&B Run: https://app.wandb.ai/aletheap/my-resnet34-augmenting/runs/96jsj6qs

In [4]:
if wandb.config.random_seed:
    torch.manual_seed(wandb.config.random_seed)
    torch.cuda.manual_seed(wandb.config.random_seed)

In [5]:
# Use CUDA
#use_cuda = torch.cuda.is_available()
#cuda_dev = wandb.config.device
#device = torch.device(cuda_dev if use_cuda else "cpu")
device = torch.device(wandb.config.device)
print("Using device:", device)


Using device: cuda:0


## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [6]:

# Note to self: Try this: https://www.basicml.com/performance/2019/04/16/pytorch-data-augmentation-with-nvidia-dali.html
# cause I think cpu image transforms are a bottleneck for me



transform = transforms.Compose([transforms.Resize(256),
                                transforms.RandomCrop(224),
                                transforms.ColorJitter(brightness=.5, contrast=.5, saturation=.5, hue=.5),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(90),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])

dev_test_transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = os.path.join("/home/apower/data", wandb.config.dataset) 
print('datadir:', datadir)
traindir = os.path.join(datadir, 'train')
print('traindir:', traindir)
devdir = os.path.join(datadir, 'dev')
print('devdir:', devdir)
testdir = os.path.join(datadir, 'test')
print('testdir:', testdir)

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, dev_test_transform)
X_test = torchvision.datasets.ImageFolder(testdir, dev_test_transform)

num_labels = len(X_train.classes)

print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)
#print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\nlabels:', num_labels)

datadir: /home/apower/data/oxford-iiit-pet
traindir: /home/apower/data/oxford-iiit-pet/train
devdir: /home/apower/data/oxford-iiit-pet/dev
testdir: /home/apower/data/oxford-iiit-pet/test
training_set: 5760 
dev_set: 800 
test_set: 800 
labels: 38


In [7]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [8]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_dev[0][0])

In [9]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=1, shuffle=True)
test_loader = DataLoader(X_test, batch_size=1, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

In [10]:
class Projection(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
                
        if in_channels == out_channels:
            self.proj = None 
        else:
            self.proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            if wandb.config.initializer:
                getattr(nn.init, wandb.config.initializer)(self.proj.weight, gain=wandb.config.init_gain)

    def forward(self, X):
        if self.proj:
            return self.proj(X)
        else:
            return X

In [11]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        if in_channels == out_channels:
            stride = 1
        else:
            stride = 2

        conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        if wandb.config.initializer:
            getattr(nn.init, wandb.config.initializer)(conv1.weight, gain=wandb.config.init_gain)
        
        conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if wandb.config.initializer:
            getattr(nn.init, wandb.config.initializer)(conv2.weight, gain=wandb.config.init_gain)
        
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())
        self.layer2 = nn.Sequential(conv2,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        self.proj = Projection(in_channels, out_channels)

    def forward(self, X):
        a = X
        a = self.layer1(a)
        a = self.layer2(a)
        return a + self.proj(X)

In [12]:
class ResNet34(nn.Module):

    def __init__(self, num_labels):
        super().__init__()
        
        self. num_labels = num_labels
        
        # 7x7 Conv
        conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2)
        if wandb.config.initializer:
            getattr(nn.init, wandb.config.initializer)(conv1.weight, gain=wandb.config.init_gain)
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(num_features=64),
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        # 3x3 MaxPool
        self.layer2 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.ReLU())

        # Stage 1
        self.stage1 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64))

        # Stage 2
        self.stage2 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128))

        # Stage 3
        self.stage3 = nn.Sequential(ResidualBlock(in_channels=128, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256))
        
        # Stage 4
        self.stage4 = nn.Sequential(ResidualBlock(in_channels=256, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512))
        

        # AveragePool
        self.avgpool = nn.AvgPool2d(kernel_size=2)
        
        # Fully Connected
        lin = nn.Linear(in_features=4608, out_features=num_labels)
        if wandb.config.initializer:
            getattr(nn.init, wandb.config.initializer)(lin.weight, gain=wandb.config.init_gain)        
        self.fc = nn.Sequential(lin, nn.Softmax(dim=1)) 

        
    def forward(self, X):
        prefix = ' ' * 0
        a = X       
        a = self.layer1(X)
        a = self.layer2(a)
        a = self.stage1(a)
        a = self.stage2(a)
        a = self.stage3(a)
        a = self.stage4(a)
        a = self.avgpool(a)
        a = a.reshape(a.size(0), -1)
        a = self.fc(a)
        return a
        

## Train the Model



In [13]:
model = ResNet34(num_labels=num_labels)
#wandb.watch(model)
#model = nn.DataParallel(model, device_ids=[0,1])
#model = nn.DataParallel(model)
model = model.to(device)

In [14]:
params = model.parameters()
p = list(params)[-1]
print(p)
#print(p + 2)

Parameter containing:
tensor([-4.1077e-03, -7.5653e-03,  6.3444e-05, -1.2351e-02,  1.3766e-02,
         7.8591e-03,  7.3893e-03, -3.1297e-04,  9.3235e-03, -7.6505e-04,
         8.4869e-03,  1.0370e-02, -9.6265e-03,  3.2102e-03, -1.1977e-02,
        -3.8977e-03, -1.3045e-02, -3.3345e-03, -6.7620e-03, -1.2108e-02,
        -9.7102e-03, -1.4038e-04,  7.9170e-03, -1.3712e-02, -4.3569e-03,
         4.3466e-03, -9.1772e-03,  1.1812e-02,  7.7005e-03, -1.4727e-02,
        -7.9887e-03, -9.3226e-03,  3.8025e-04,  6.9521e-03, -1.3534e-02,
        -6.2855e-03,  5.8158e-03,  1.3524e-02], device='cuda:0',
       requires_grad=True)


In [15]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [16]:
def accuracy(model, loader, name):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    #print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    #wandb.log({name + '_set_accuracy': accuracy})
    return accuracy

In [27]:
def train_model(model, loader, learning_rate=0.1, max_epochs=20):
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, wandb.config.optimizer)(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        model.train()
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            optimizer.zero_grad()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            #loss.item()  # <-- If you delete this it won't learn
            loss.backward()
            optimizer.step()
        t1 = time.time()
        duration = t1-t0
        loss_num = loss.item()
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        relative_accuracy = dev_accuracy / train_accuracy
        #train_accuracy = 'False'
        #dev_accuracy = 'False'
        #relative_accuracy = 'False'
        wandb.log({'loss': loss.item(), 
                   'secs_per_epoch': duration, 
                   'train_accuracy': train_accuracy, 
                   'dev_accuracy': dev_accuracy, 
                   'relative_accuracy': relative_accuracy})
        print(' ' * 4, 
              '%.1f seconds -' % (duration), 
              'epoch:', epoch, 
              'loss:', loss_num, 
              'train:', train_accuracy, 
              'dev:', dev_accuracy, 
              'relative_accuracy:', relative_accuracy)

    return model

In [18]:
learning_rate = wandb.config.learning_rate
max_epochs = wandb.config.max_epochs
total_loops = wandb.config.training_loops
#model.train()

In [19]:
max_epochs = 1
total_loops = 1

In [20]:
p

Parameter containing:
tensor([-4.1077e-03, -7.5653e-03,  6.3444e-05, -1.2351e-02,  1.3766e-02,
         7.8591e-03,  7.3893e-03, -3.1297e-04,  9.3235e-03, -7.6505e-04,
         8.4869e-03,  1.0370e-02, -9.6265e-03,  3.2102e-03, -1.1977e-02,
        -3.8977e-03, -1.3045e-02, -3.3345e-03, -6.7620e-03, -1.2108e-02,
        -9.7102e-03, -1.4038e-04,  7.9170e-03, -1.3712e-02, -4.3569e-03,
         4.3466e-03, -9.1772e-03,  1.1812e-02,  7.7005e-03, -1.4727e-02,
        -7.9887e-03, -9.3226e-03,  3.8025e-04,  6.9521e-03, -1.3534e-02,
        -6.2855e-03,  5.8158e-03,  1.3524e-02], device='cuda:0',
       requires_grad=True)

In [21]:
for i in range(wandb.config.training_loops):
    # train model
    print('learning_rate:', wandb.config.learning_rate, 'max_epochs:', wandb.config.max_epochs)
    model = train_model(model, 
                        train_loader, 
                        learning_rate=wandb.config.learning_rate, 
                        max_epochs=wandb.config.max_epochs)

    # save weights
    #cpu_model = model.to(torch.device('cpu'))
    #torch.save(cpu_model, 'resnet-augmenting.pt')
    #model = model.to(device)

    # slow down learning
    #learning_rate = learning_rate / 10
    #max_epochs = max_epochs + 300


learning_rate: 0.1 max_epochs: 200
     18.7 seconds - epoch: 0 loss: 3.681574821472168 train: 2.7604166666666665 dev: 1.375 relative_accuracy: 0.4981132075471698
     17.9 seconds - epoch: 1 loss: 3.6097207069396973 train: 2.7256944444444446 dev: 3.25 relative_accuracy: 1.1923566878980891
     18.1 seconds - epoch: 2 loss: 3.6374127864837646 train: 2.7777777777777777 dev: 2.375 relative_accuracy: 0.855
     17.9 seconds - epoch: 3 loss: 3.6489827632904053 train: 2.673611111111111 dev: 3.0 relative_accuracy: 1.122077922077922
     17.9 seconds - epoch: 4 loss: 3.6319079399108887 train: 2.795138888888889 dev: 2.5 relative_accuracy: 0.8944099378881988
     17.8 seconds - epoch: 5 loss: 3.6472926139831543 train: 2.7083333333333335 dev: 2.875 relative_accuracy: 1.0615384615384615
     17.6 seconds - epoch: 6 loss: 3.639526844024658 train: 2.795138888888889 dev: 2.875 relative_accuracy: 1.0285714285714287
     18.0 seconds - epoch: 7 loss: 3.6435115337371826 train: 3.0381944444444446 dev: 3

     17.9 seconds - epoch: 65 loss: 3.6310834884643555 train: 5.972222222222222 dev: 8.5 relative_accuracy: 1.4232558139534883
     17.7 seconds - epoch: 66 loss: 3.6233253479003906 train: 6.753472222222222 dev: 8.125 relative_accuracy: 1.2030848329048842
     17.8 seconds - epoch: 67 loss: 3.596874713897705 train: 6.458333333333333 dev: 7.125 relative_accuracy: 1.103225806451613
     17.8 seconds - epoch: 68 loss: 3.6188409328460693 train: 6.40625 dev: 7.375 relative_accuracy: 1.1512195121951219
     18.1 seconds - epoch: 69 loss: 3.6204681396484375 train: 6.284722222222222 dev: 7.625 relative_accuracy: 1.2132596685082873
     17.4 seconds - epoch: 70 loss: 3.6137008666992188 train: 6.371527777777778 dev: 7.125 relative_accuracy: 1.1182561307901908
     18.0 seconds - epoch: 71 loss: 3.5935566425323486 train: 6.006944444444445 dev: 7.0 relative_accuracy: 1.1653179190751444
     17.8 seconds - epoch: 72 loss: 3.623973846435547 train: 6.267361111111111 dev: 7.375 relative_accuracy: 1.17

     17.8 seconds - epoch: 131 loss: 3.5951149463653564 train: 7.03125 dev: 8.625 relative_accuracy: 1.2266666666666666
     18.0 seconds - epoch: 132 loss: 3.6324639320373535 train: 7.222222222222222 dev: 9.125 relative_accuracy: 1.2634615384615384
     18.0 seconds - epoch: 133 loss: 3.56304931640625 train: 7.118055555555555 dev: 7.625 relative_accuracy: 1.071219512195122
     17.6 seconds - epoch: 134 loss: 3.6295623779296875 train: 6.996527777777778 dev: 8.5 relative_accuracy: 1.2148883374689827
     17.8 seconds - epoch: 135 loss: 3.570326328277588 train: 6.215277777777778 dev: 7.75 relative_accuracy: 1.246927374301676
     17.7 seconds - epoch: 136 loss: 3.6582024097442627 train: 7.256944444444445 dev: 8.875 relative_accuracy: 1.2229665071770335
     18.0 seconds - epoch: 137 loss: 3.6010019779205322 train: 7.586805555555555 dev: 10.375 relative_accuracy: 1.3675057208237986
     17.5 seconds - epoch: 138 loss: 3.601182460784912 train: 7.222222222222222 dev: 10.25 relative_accurac

     17.8 seconds - epoch: 196 loss: 3.6006228923797607 train: 7.517361111111111 dev: 10.125 relative_accuracy: 1.3468822170900694
     18.1 seconds - epoch: 197 loss: 3.6260533332824707 train: 7.256944444444445 dev: 10.0 relative_accuracy: 1.3779904306220094
     18.1 seconds - epoch: 198 loss: 3.581183671951294 train: 7.309027777777778 dev: 10.25 relative_accuracy: 1.402375296912114
     18.1 seconds - epoch: 199 loss: 3.629161834716797 train: 7.725694444444445 dev: 9.625 relative_accuracy: 1.2458426966292135


In [39]:
#model, losses = model
#p2 = list(model.parameters())[-1]


In [38]:
#p

In [37]:
#p2

In [36]:
#model.state_dict()

In [None]:
model = train_model(model, train_loader, learning_rate=.01, max_epochs=100)

     17.8 seconds - epoch: 0 loss: 3.622713565826416 train: 7.048611111111111 dev: 9.625 relative_accuracy: 1.3655172413793104
     17.3 seconds - epoch: 1 loss: 3.5996108055114746 train: 7.204861111111111 dev: 9.875 relative_accuracy: 1.3706024096385543
     18.0 seconds - epoch: 2 loss: 3.603652238845825 train: 7.777777777777778 dev: 9.625 relative_accuracy: 1.2375
     17.6 seconds - epoch: 3 loss: 3.597571611404419 train: 7.621527777777778 dev: 9.625 relative_accuracy: 1.262870159453303
     17.8 seconds - epoch: 4 loss: 3.6114957332611084 train: 7.413194444444445 dev: 9.875 relative_accuracy: 1.3320843091334895
     18.0 seconds - epoch: 5 loss: 3.5934243202209473 train: 7.447916666666667 dev: 9.875 relative_accuracy: 1.3258741258741258
     18.0 seconds - epoch: 6 loss: 3.6105284690856934 train: 7.96875 dev: 10.125 relative_accuracy: 1.2705882352941176
     18.1 seconds - epoch: 7 loss: 3.6243629455566406 train: 7.708333333333333 dev: 9.875 relative_accuracy: 1.281081081081081
  