# Alethea's Attempt at ResNet-34

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb
wandb.init(project="my-resnet34")

In [None]:
# Use CUDA
use_cuda = torch.cuda.is_available()
cuda_dev = "cuda"
device = torch.device(cuda_dev if use_cuda else "cpu")
print("Using device:", device)
#if use_cuda:
#    print('-', torch.cuda.get_device_name(cuda_dev_num))

In [None]:
#wandb.init(config=args)
wandb.config.initial_lr = 0.1
wandb.config.load_workers = 20
wandb.config.batch_size = 700
wandb.config.max_epochs = 150
wandb.config.training_loops = 1
wandb.config.dropout=.5

## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [None]:
transform = transforms.Compose([transforms.Resize(256),        
                                transforms.CenterCrop(224),    
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


#datadir = "/home/apower/data/oxford-iiit-pet"
datadir = "/mnt/data/oxford-iiit-pet"
traindir = os.path.join(datadir, 'train')
devdir = os.path.join(datadir, 'dev')
testdir = os.path.join(datadir, 'test')

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, transform)
X_test = torchvision.datasets.ImageFolder(testdir, transform)

num_labels = len(X_train.classes)

print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)

In [None]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [None]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=512, shuffle=True)
test_loader = DataLoader(X_test, batch_size=256, shuffle=True)

## Let's Build the Model

I'm basing this on the resnet diagram from: https://cv-tricks.com/keras/understand-implement-resnets/

In [None]:
class Projection(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
                
        if in_channels == out_channels:
            self.proj = None 
        else:
            self.proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)

    def forward(self, X):
        if self.proj:
            return self.proj(X)
        else:
            return X

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        if in_channels == out_channels:
            stride = 1
        else:
            stride = 2

        self.layer1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride),
                                    torch.nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())
        self.layer2 = nn.Sequential(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                                    torch.nn.BatchNorm2d(out_channels), 
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        self.proj = Projection(in_channels, out_channels)

    def forward(self, X):
        #prefix = ' ' * 2
        #print(prefix, 'START ResidualBlock')
        #print(prefix, 'in_channels:', self.in_channels)
        #print(prefix, 'out_channels:', self.in_channels)
        #print(prefix, 'stride_channels:', self.in_channels)
        #print(prefix, 'layer1:', self.layer1)
        #print(prefix, 'layer2:', self.layer2)
        a = X
        #print(prefix, 'Initial Shape', a.size())
        a = self.layer1(a)
        #print(prefix, 'After Conv 1:', a.size())
        a = self.layer2(a)
        #print(prefix, 'After Conv 2:', a.size())
        #print(prefix, 'END ResidualBlock')
        return a + self.proj(X)

In [None]:
class ResNet34(nn.Module):

    def __init__(self, num_labels):
        super().__init__()
        
        self. num_labels = num_labels
        
        # 7x7 Conv
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.Dropout2d(p=wandb.config.dropout),
                                    nn.ReLU())

        # 3x3 MaxPool
        self.layer2 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.ReLU())

        # Stage 1
        self.stage1 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64),
                                    ResidualBlock(in_channels=64, out_channels=64))

        # Stage 2
        self.stage2 = nn.Sequential(ResidualBlock(in_channels=64, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128),
                                    ResidualBlock(in_channels=128, out_channels=128))

        # Stage 3
        self.stage3 = nn.Sequential(ResidualBlock(in_channels=128, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256),
                                    ResidualBlock(in_channels=256, out_channels=256))
        
        # Stage 4
        self.stage4 = nn.Sequential(ResidualBlock(in_channels=256, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512),
                                    ResidualBlock(in_channels=512, out_channels=512))
        

        # AveragePool
        self.avgpool = nn.AvgPool2d(kernel_size=2)
        
        # Fully Connected
        self.fc = nn.Sequential(nn.Linear(in_features=4608, out_features=num_labels),
                                nn.Softmax(dim=1)) 

        
    def forward(self, X):
        prefix = ' ' * 0
        a = X
        #print(prefix, 'START ResNet34')
        #print(prefix, 'Initial Shape', a.size())
        
        a = self.layer1(X)
        #print(prefix, 'After Layer 1:', a.size())
        
        a = self.layer2(a)
        #print(prefix, 'After Layer 2:', a.size())
        
        a = self.stage1(a)
        #print(prefix, 'After Stage 1:', a.size())
        
        a = self.stage2(a)
        #print(prefix, 'After Stage 2:', a.size())
        
        a = self.stage3(a)
        #print(prefix, 'After Stage 3:', a.size())
        
        a = self.stage4(a)
        #print(prefix, 'After Stage 4:', a.size())
        
        a = self.avgpool(a)
        #print(prefix, 'After AvgPool:', a.size())
        
        a = a.reshape(a.size(0), -1)
        #print(prefix, 'After Reshape:', a.size())
        
        a = self.fc(a)
        #print(prefix, 'After FC:', a.size())
        
        #print(prefix, 'END ResNet34')
        return a
        

## Train the Model



In [None]:
model = ResNet34(num_labels=num_labels)

# Let's see if we have soemthing saved already
#try:
#    model = torch.load('resnet.pt')
#except:
#    pass

model = nn.DataParallel(model)
if use_cuda:
    model = model.to(device)

# Magic
#wandb.watch(model)



In [None]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
#criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=.1)


In [None]:
#local_batch, local_labels = iter(train_loader).__next__()

In [None]:
#X, y = local_batch.to(device), local_labels.to(device)

In [None]:
#y_pred = model.forward(X)

In [None]:
#loss = criterion(y_pred, y)

In [None]:
#optimizer.zero_grad()

In [None]:
#loss.backward()

In [None]:
#optimizer.step()

In [None]:
def train_model(model, loader, learning_rate=0.1, losses=[10**10], max_epochs=20):
    criterion = nn.CrossEntropyLoss()
    wandb.config.optimizer = "SGD"
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        batch = 0
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            y_pred = model.forward(X)
            loss = criterion(y_pred, y)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('epoch:', epoch, 'batch:', batch, 'loss:', loss.item())
            batch += 1
        t1 = time.time()
        duration = t1-t0
        wandb.log({'epoch': epoch, 'loss': loss.item(), 'secs_per_epoch': duration})
        print(' ' * 4, '%.1f seconds -' % (duration), 'epoch:', epoch, 'loss:', loss.item())

    return model, losses

In [None]:
losses = [10**10]
learning_rate = wandb.config.initial_lr
max_epochs = wandb.config.max_epochs
total_loops = wandb.config.training_loops
model.train()
for i in range(total_loops):
    # train model
    print('learning_rate:', learning_rate, 'max_epochs:', max_epochs)
    model, losses = train_model(model, train_loader, learning_rate, losses, max_epochs)

    # save weights
    cpu_model = model.to(torch.device('cpu'))
    torch.save(cpu_model, 'resnet.pt')
    if use_cuda:
        model = model.to(device)

    # slow down learning
    learning_rate = learning_rate / 10
    #max_epochs = max_epochs + 300


In [None]:
def eval(model, loader, name):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    wandb.log({name + '_set_accuracy': accuracy})

eval(model, train_loader, 'train')
eval(model, dev_loader, 'dev')