# ResNet-34 Official


In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb


In [2]:
# Use CUDA
use_cuda = torch.cuda.is_available()
cuda_dev = "cuda:0"
device = torch.device(cuda_dev if use_cuda else "cpu")
print("Using device:", device)
#if use_cuda:
#    print('-', torch.cuda.get_device_name(cuda_dev_num))

Using device: cuda:0


In [3]:
config = {
            'load_workers': 20, 
            'batch_size': 200,
            'max_epochs': 40,
         }
wandb.init(project="official-resnet34", config=config)

#wandb.init(config=args)
#wandb.config.initial_lr = 0.1
#wandb.config.load_workers = 20
#wandb.config.batch_size = 500
#wandb.config.max_epochs = 500
#wandb.config.training_loops = 1
#wandb.config.dropout=.5

W&B Run: https://app.wandb.ai/aletheap/official-resnet34/runs/a9s35bt1

## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [4]:
transform = transforms.Compose([transforms.Resize(256),        
                                transforms.CenterCrop(224),    
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = "/home/apower/data/oxford-iiit-pet"
traindir = os.path.join(datadir, 'train')
devdir = os.path.join(datadir, 'dev')
testdir = os.path.join(datadir, 'test')

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, transform)
X_test = torchvision.datasets.ImageFolder(testdir, transform)

num_labels = len(X_train.classes)

print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)

training_set: 5760 
dev_set: 800 
test_set: 800 
labels: 38


In [5]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [6]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=512, shuffle=True)
test_loader = DataLoader(X_test, batch_size=256, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

## Train the Model



In [7]:
model = torchvision.models.resnet34(pretrained=False, progress=True)
#wandb.watch(model)
#model = nn.DataParallel(model, device_ids=[0, 1])
if use_cuda:
    model = model.to(device)

# Magic



In [8]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [9]:
def accuracy(model, loader, name):
    #model = model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    #print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    #wandb.log({name + '_set_accuracy': accuracy})
    return accuracy

In [10]:
def train_model(model, loader, learning_rate=0.1, losses=[10**10], max_epochs=20):
    model = model.train()
    criterion = nn.CrossEntropyLoss()
    wandb.config.optimizer = "SGD"
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        batch = 0
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            y_pred = model.forward(X)
            loss = criterion(y_pred, y)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('epoch:', epoch, 'batch:', batch, 'loss:', loss.item())
            batch += 1
        t1 = time.time()
        duration = t1-t0
        loss_num = loss.item()
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        wandb.log({'loss': loss.item(), 'secs_per_epoch': duration, 'train_accuracy': train_accuracy, 'dev_accuracy': dev_accuracy})
        print(' ' * 4, '%.1f seconds -' % (duration), 'epoch:', epoch, 'loss:', loss_num, 'train:', train_accuracy, 'dev:', dev_accuracy)

    return model, losses

In [11]:
# train model
#model.train()
print('max_epochs:', wandb.config.max_epochs)
model, losses = train_model(model, train_loader, max_epochs=wandb.config.max_epochs)

# save weights
cpu_model = model.to(torch.device('cpu'))
torch.save(cpu_model, 'resnet-official.pt')
if use_cuda:
    model = model.to(device)


max_epochs: 40
     17.4 seconds - epoch: 0 loss: 4.304771900177002 train: 4.513888888888889 dev: 4.625
     15.9 seconds - epoch: 1 loss: 3.7144951820373535 train: 8.715277777777779 dev: 6.625
     15.9 seconds - epoch: 2 loss: 3.3128960132598877 train: 14.930555555555555 dev: 12.5
     15.5 seconds - epoch: 3 loss: 3.1625919342041016 train: 15.32986111111111 dev: 11.75
     15.4 seconds - epoch: 4 loss: 2.8166074752807617 train: 20.65972222222222 dev: 14.125
     15.8 seconds - epoch: 5 loss: 2.6467947959899902 train: 22.569444444444443 dev: 15.125
     15.3 seconds - epoch: 6 loss: 2.6628758907318115 train: 26.84027777777778 dev: 20.75
     15.3 seconds - epoch: 7 loss: 2.329137086868286 train: 34.40972222222222 dev: 24.375
     16.4 seconds - epoch: 8 loss: 2.2875921726226807 train: 33.263888888888886 dev: 21.0
     15.7 seconds - epoch: 9 loss: 2.3276803493499756 train: 35.260416666666664 dev: 19.5
     15.3 seconds - epoch: 10 loss: 2.1557366847991943 train: 47.72569444444444 dev

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
