# ResNet-34 Official


In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb


In [2]:
for i in range(3):
    print(i, torch.cuda.get_device_name(i))

0 GeForce RTX 2080 Ti
1 GeForce GTX 1080 Ti
2 GeForce GTX 980


In [3]:
# Use CUDA
use_cuda = torch.cuda.is_available()
cuda_dev = "cuda:1"
device = torch.device(cuda_dev if use_cuda else "cpu")
print("Using device:", device)


Using device: cuda:1


In [4]:
config = {
            'load_workers': 20, 
            'batch_size': 190,
            'max_epochs': 200,
            'optimizer': 'SGD',
            'dataset': 'oxford-iiit-pet',
         }
wandb.init(project="official-resnet34", config=config)


W&B Run: https://app.wandb.ai/aletheap/official-resnet34/runs/b2m4foty

## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [5]:
transform = transforms.Compose([transforms.Resize(256),
                                transforms.RandomCrop(224),
                                transforms.ColorJitter(brightness=.1, contrast=.1, saturation=.1, hue=.1),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(180),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])

dev_test_transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = os.path.join("/home/apower/data", wandb.config.dataset) 
print('datadir:', datadir)
traindir = os.path.join(datadir, 'train')
print('traindir:', traindir)
devdir = os.path.join(datadir, 'dev')
print('devdir:', devdir)
testdir = os.path.join(datadir, 'test')
print('testdir:', testdir)

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, dev_test_transform)
X_test = torchvision.datasets.ImageFolder(testdir, dev_test_transform)

num_labels = len(X_train.classes)

print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)
#print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\nlabels:', num_labels)

datadir: /home/apower/data/oxford-iiit-pet
traindir: /home/apower/data/oxford-iiit-pet/train
devdir: /home/apower/data/oxford-iiit-pet/dev
testdir: /home/apower/data/oxford-iiit-pet/test
training_set: 5760 
dev_set: 800 
test_set: 800 
labels: 38


In [6]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [7]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=1, shuffle=True)
test_loader = DataLoader(X_test, batch_size=1, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

## Train the Model



In [8]:
model = torchvision.models.resnet34(pretrained=False, progress=True)
#wandb.watch(model)
#model = nn.DataParallel(model, device_ids=[0,1])
if use_cuda:
    model = model.to(device)

In [9]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [10]:
def accuracy(model, loader, name):
    #model = model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    #print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    #wandb.log({name + '_set_accuracy': accuracy})
    return accuracy

In [11]:
def train_model(model, loader, learning_rate=0.1, losses=[10**10], max_epochs=20):
    model = model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, wandb.config.optimizer)(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        batch = 0
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            y_pred = model.forward(X)
            loss = criterion(y_pred, y)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('epoch:', epoch, 'batch:', batch, 'loss:', loss.item())
            batch += 1
        t1 = time.time()
        duration = t1-t0
        loss_num = loss.item()
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        wandb.log({'loss': loss.item(), 'secs_per_epoch': duration, 'train_accuracy': train_accuracy, 'dev_accuracy': dev_accuracy, 'relative_accuracy': dev_accuracy / train_accuracy})
        print(' ' * 4, '%.1f seconds -' % (duration), 'epoch:', epoch, 'loss:', loss_num, 'train:', train_accuracy, 'dev:', dev_accuracy, 'relative_accuracy:', dev_accuracy / train_accuracy)

    return model, losses

In [None]:
# train model
#model.train()
print('max_epochs:', wandb.config.max_epochs)
model, losses = train_model(model, train_loader, max_epochs=wandb.config.max_epochs)

# save weights
cpu_model = model.to(torch.device('cpu'))
torch.save(cpu_model, 'resnet-official.pt')
if use_cuda:
    model = model.to(device)


max_epochs: 200
     25.9 seconds - epoch: 0 loss: 4.078146457672119 train: 2.4652777777777777 dev: 3.75 relative_accuracy: 1.5211267605633803
     27.5 seconds - epoch: 1 loss: 4.193048000335693 train: 4.670138888888889 dev: 1.875 relative_accuracy: 0.4014869888475836
     27.2 seconds - epoch: 2 loss: 3.4384031295776367 train: 6.09375 dev: 2.875 relative_accuracy: 0.4717948717948718
     25.9 seconds - epoch: 3 loss: 3.067910671234131 train: 9.375 dev: 2.75 relative_accuracy: 0.29333333333333333
     23.6 seconds - epoch: 4 loss: 3.1321616172790527 train: 6.319444444444445 dev: 2.5 relative_accuracy: 0.3956043956043956
     23.7 seconds - epoch: 5 loss: 3.2971112728118896 train: 8.76736111111111 dev: 1.875 relative_accuracy: 0.21386138613861388
     24.0 seconds - epoch: 6 loss: 3.2655656337738037 train: 8.958333333333334 dev: 3.0 relative_accuracy: 0.33488372093023255
     24.4 seconds - epoch: 7 loss: 3.228858470916748 train: 9.184027777777779 dev: 3.625 relative_accuracy: 0.394706