# Alethea's Attempt at ResNet-34

In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb


In [2]:
for i in range(3):
    print(i, torch.cuda.get_device_name(i))

0 GeForce RTX 2080 Ti
1 GeForce GTX 1080 Ti
2 GeForce GTX 980


In [3]:
# Use CUDA
use_cuda = torch.cuda.is_available()
cuda_dev = "cuda"
device = torch.device(cuda_dev if use_cuda else "cpu")
print("Using device:", device)


Using device: cuda


In [4]:
config = {
            'initial_lr': 0.1,
            'load_workers': 20, 
            'batch_size': 350,
            'max_epochs': 500,
            'training_loops': 1,
            'dropout': 0,
            'optimizer': 'SGD',
            'dataset': 'imagenette2-320',
         }
wandb.init(project="my-resnet34-augmenting", config=config)


W&B Run: https://app.wandb.ai/aletheap/my-resnet34-augmenting/runs/2rj7q4xr

## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [16]:
transform = transforms.Compose([transforms.Resize(256),
                                transforms.RandomCrop(224),
                                transforms.ColorJitter(brightness=.5, contrast=.5, saturation=.5, hue=.5),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(90),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])

dev_test_transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = os.path.join("/home/apower/data", wandb.config.dataset) 
print('datadir:', datadir)
traindir = os.path.join(datadir, 'train')
print('traindir:', traindir)
devdir = os.path.join(datadir, 'dev')
print('devdir:', devdir)
#testdir = os.path.join(datadir, 'test')
#print('testdir:', testdir)

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, dev_test_transform)
#X_test = torchvision.datasets.ImageFolder(testdir, dev_test_transform)

num_labels = len(X_train.classes)

#print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)
print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\nlabels:', num_labels)

datadir: /home/apower/data/imagenette2-320
traindir: /home/apower/data/imagenette2-320/train
devdir: /home/apower/data/imagenette2-320/dev
training_set: 9469 
dev_set: 3925 
labels: 10


In [6]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [7]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=1, shuffle=True)
#test_loader = DataLoader(X_test, batch_size=1, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

In [8]:
class Projection(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
                
        if in_channels == out_channels:
            self.proj = None 
        else:
            self.proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            nn.init.xavier_uniform_(self.proj.weight)

    def forward(self, X):
        if self.proj:
            return self.proj(X)
        else:
            return X

In [9]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        if in_channels == out_channels:
            stride = 1
        else:
            stride = 2

        conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        nn.init.xavier_uniform_(conv1.weight, gain=nn.init.calculate_gain('relu'))
        
        conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        nn.init.xavier_uniform_(conv2.weight, gain=nn.init.calculate_gain('relu'))
        
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())
        self.layer2 = nn.Sequential(conv2,
                                    nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        self.proj = Projection(in_channels, out_channels)

    def forward(self, X):
        a = X
        a = self.layer1(a)
        a = self.layer2(a)
        return a + self.proj(X)

In [10]:
class ResNet34(nn.Module):

    def __init__(self, num_labels):
        super().__init__()
        
        self. num_labels = num_labels
        
        # 7x7 Conv
        conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2)
        nn.init.xavier_uniform_(conv1.weight, gain=nn.init.calculate_gain('relu'))
        self.layer1 = nn.Sequential(conv1,
                                    nn.BatchNorm2d(num_features=64),
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        # 3x3 MaxPool
        self.layer2 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.ReLU())

        # Stage 1
        self.stage1 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64))

        # Stage 2
        self.stage2 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128))

        # Stage 3
        self.stage3 = nn.Sequential(ResidualBlock(in_channels=128, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256))
        
        # Stage 4
        self.stage4 = nn.Sequential(ResidualBlock(in_channels=256, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512))
        

        # AveragePool
        self.avgpool = nn.AvgPool2d(kernel_size=2)
        
        # Fully Connected
        lin = nn.Linear(in_features=4608, out_features=num_labels)
        nn.init.xavier_uniform_(lin.weight, gain=nn.init.calculate_gain('sigmoid'))        
        self.fc = nn.Sequential(lin, nn.Softmax(dim=1)) 

        
    def forward(self, X):
        prefix = ' ' * 0
        a = X       
        a = self.layer1(X)
        a = self.layer2(a)
        a = self.stage1(a)
        a = self.stage2(a)
        a = self.stage3(a)
        a = self.stage4(a)
        a = self.avgpool(a)
        a = a.reshape(a.size(0), -1)
        a = self.fc(a)
        return a
        

## Train the Model



In [11]:
model = ResNet34(num_labels=num_labels)
#wandb.watch(model)
model = nn.DataParallel(model, device_ids=[0,1])
if use_cuda:
    model = model.to(device)

    There is an imbalance between your GPUs. You may want to exclude GPU 1 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


In [12]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [13]:
def accuracy(model, loader, name):
    #model = model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    #print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    #wandb.log({name + '_set_accuracy': accuracy})
    return accuracy

In [14]:
def train_model(model, loader, learning_rate=0.1, losses=[10**10], max_epochs=20):
    model = model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, wandb.config.optimizer)(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        batch = 0
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            y_pred = model.forward(X)
            loss = criterion(y_pred, y)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('epoch:', epoch, 'batch:', batch, 'loss:', loss.item())
            batch += 1
        t1 = time.time()
        duration = t1-t0
        loss_num = loss.item()
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        wandb.log({'loss': loss.item(), 'secs_per_epoch': duration, 'train_accuracy': train_accuracy, 'dev_accuracy': dev_accuracy, 'relative_accuracy': dev_accuracy / train_accuracy})
        print(' ' * 4, '%.1f seconds -' % (duration), 'epoch:', epoch, 'loss:', loss_num, 'train:', train_accuracy, 'dev:', dev_accuracy, 'relative_accuracy:', dev_accuracy / train_accuracy)

    return model, losses

In [15]:
losses = [10**10]
learning_rate = wandb.config.initial_lr
max_epochs = wandb.config.max_epochs
total_loops = wandb.config.training_loops
model.train()
for i in range(total_loops):
    # train model
    print('learning_rate:', learning_rate, 'max_epochs:', max_epochs)
    model, losses = train_model(model, 
                                train_loader, 
                                learning_rate=learning_rate, 
                                max_epochs=wandb.config.max_epochs)

    # save weights
    cpu_model = model.to(torch.device('cpu'))
    torch.save(cpu_model, 'resnet-augmenting.pt')
    if use_cuda:
        model = model.to(device)

    # slow down learning
    learning_rate = learning_rate / 10
    #max_epochs = max_epochs + 300


learning_rate: 0.1 max_epochs: 500
     30.2 seconds - epoch: 0 loss: 2.4085190296173096 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     40.1 seconds - epoch: 1 loss: 2.303256034851074 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     40.0 seconds - epoch: 2 loss: 2.3558876514434814 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     40.3 seconds - epoch: 3 loss: 2.4085190296173096 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     39.8 seconds - epoch: 4 loss: 2.3558876514434814 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     29.2 seconds - epoch: 5 loss: 2.1453611850738525 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy: 0.9800716560509554
     25.1 seconds - epoch: 6 loss: 2.250624418258667 train: 10.138346182279015 dev: 9.936305732484076 relative_accuracy:

KeyboardInterrupt: 