# ConvNet Example

In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
import time

import wandb

In [2]:
for i in range(3):
    print(i, torch.cuda.get_device_name(i))

0 GeForce RTX 2080 Ti
1 GeForce GTX 1080 Ti
2 GeForce GTX 980


In [3]:
config = {
            'device': 'cuda:0',
            'initializer': 'xavier_normal_',
            'learning_rate': 0.1,
            'load_workers': 20, 
            'batch_size': 190,
            'max_epochs': 1,
            'training_loops': 1,
            'optimizer': 'SGD',
            'dataset': 'oxford-iiit-pet',
         }
wandb.init(project="toy-conv-net", config=config)

W&B Run: https://app.wandb.ai/aletheap/toy-conv-net/runs/tuwxzy8x

In [4]:
device = torch.device(wandb.config.device)
print("Using device:", device)

Using device: cuda:0


## Load our data. 

I'm using advice from https://www.learnopencv.com/pytorch-for-beginners-image-classification-using-pre-trained-models/ about regularizing image data. 


In [5]:
transform = transforms.Compose([transforms.Resize(256),
                                transforms.RandomCrop(224),
                                transforms.ColorJitter(brightness=.5, contrast=.5, saturation=.5, hue=.5),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(90),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])

dev_test_transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),         
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],    
                                                     std=[0.229, 0.224, 0.225])
                               ])


datadir = os.path.join("/home/apower/data", wandb.config.dataset) 
print('datadir:', datadir)
traindir = os.path.join(datadir, 'train')
print('traindir:', traindir)
devdir = os.path.join(datadir, 'dev')
print('devdir:', devdir)
testdir = os.path.join(datadir, 'test')
print('testdir:', testdir)

X_train = torchvision.datasets.ImageFolder(traindir, transform)
X_dev = torchvision.datasets.ImageFolder(devdir, dev_test_transform)
X_test = torchvision.datasets.ImageFolder(testdir, dev_test_transform)

num_labels = len(X_train.classes)

print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\ntest_set:', len(X_test), '\nlabels:', num_labels)
#print('training_set:', len(X_train), '\ndev_set:', len(X_dev), '\nlabels:', num_labels)

datadir: /home/apower/data/oxford-iiit-pet
traindir: /home/apower/data/oxford-iiit-pet/train
devdir: /home/apower/data/oxford-iiit-pet/dev
testdir: /home/apower/data/oxford-iiit-pet/test
training_set: 5760 
dev_set: 800 
test_set: 800 
labels: 38


In [6]:
#to_pic = torchvision.transforms.ToPILImage()
#to_pic(X_train[0][0])

In [7]:
train_loader = DataLoader(X_train, batch_size=wandb.config.batch_size, shuffle=True, num_workers=wandb.config.load_workers)
dev_loader = DataLoader(X_dev, batch_size=1, shuffle=True)
test_loader = DataLoader(X_test, batch_size=1, shuffle=True)

## Let's Build the Model

In [8]:
class TestNet(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.num_labels = num_labels
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=16),
                                    nn.ReLU(),
                                    #nn.BatchNorm2d(num_features=64),
                                    nn.MaxPool2d(kernel_size=16, stride=16),
                                   )
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4),
                                    nn.ReLU(),
                                    #nn.BatchNorm2d(num_features=128),
                                    nn.MaxPool2d(kernel_size=4, stride=4),
                                   )
        self.fc1 = nn.Sequential(nn.Linear(in_features=512, out_features=num_labels),
                                 nn.ReLU(),
                                )                
    def forward(self, X):
        #assert X.size() == torch.Size([1, 3, 224, 224]) 
        a1 = self.layer1(X)
        #print('a1.size():', a1.size())
        a2 = self.layer2(a1)
        #print('a2.size():', a2.size())
        a2 = a2.reshape(a2.size(0), -1)
        #print('reshaped a2.size():', a2.size())
        a3 = self.fc1(a2)
        #print('a3.size():', a3.size())
        return a3
        

## Train the Model



In [9]:
model = TestNet(num_labels=num_labels)
#wandb.watch(model)
#model = nn.DataParallel(model)
model = model.to(device)

In [10]:
params = model.parameters()
p = list(params)[-1]
print(p)
#print(p + 2)

Parameter containing:
tensor([ 0.0040,  0.0356, -0.0203, -0.0009, -0.0434, -0.0328,  0.0197, -0.0213,
         0.0344, -0.0407,  0.0030, -0.0021, -0.0110,  0.0002, -0.0393, -0.0115,
         0.0237,  0.0043, -0.0062,  0.0130, -0.0423,  0.0297,  0.0264,  0.0401,
         0.0259,  0.0146,  0.0088,  0.0384, -0.0022, -0.0108,  0.0246,  0.0119,
         0.0314,  0.0146,  0.0328, -0.0346,  0.0294,  0.0404], device='cuda:0',
       requires_grad=True)


In [11]:
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [12]:
def accuracy(model, loader, name):
    #model = model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    #print('Accuracy of the network on the %s images: %.1f %%' % (name, accuracy))
    #wandb.log({name + '_set_accuracy': accuracy})
    return accuracy

In [21]:
def train_model(model, loader, learning_rate=0.1, max_epochs=20):
    model = model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, wandb.config.optimizer)(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        t0 = time.time()
        for local_batch, local_labels in loader:
            # Transfer to GPU
            X, y = local_batch.to(device), local_labels.to(device)
            y_pred = model.forward(X)
            loss = criterion(y_pred, y)
            loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        t1 = time.time()
        duration = t1-t0
        loss_num = loss.item()
        train_accuracy = accuracy(model, train_loader, 'train')
        dev_accuracy = accuracy(model, dev_loader, 'dev')
        wandb.log({'loss': loss.item(), 'secs_per_epoch': duration, 'train_accuracy': train_accuracy, 'dev_accuracy': dev_accuracy, 'relative_accuracy': dev_accuracy / train_accuracy})
        print(' ' * 4, '%.1f seconds -' % (duration), 'epoch:', epoch, 'loss:', loss_num, 'train:', train_accuracy, 'dev:', dev_accuracy, 'relative_accuracy:', dev_accuracy / train_accuracy)

    return model

In [15]:
p

Parameter containing:
tensor([ 0.0040,  0.0356, -0.0203, -0.0009, -0.0434, -0.0328,  0.0197, -0.0213,
         0.0344, -0.0407,  0.0030, -0.0021, -0.0110,  0.0002, -0.0393, -0.0115,
         0.0237,  0.0043, -0.0062,  0.0130, -0.0423,  0.0297,  0.0264,  0.0401,
         0.0259,  0.0146,  0.0088,  0.0384, -0.0022, -0.0108,  0.0246,  0.0119,
         0.0314,  0.0146,  0.0328, -0.0346,  0.0294,  0.0404], device='cuda:0',
       requires_grad=True)

In [22]:
for i in range(wandb.config.training_loops):
    # train model
    print('learning_rate:', wandb.config.learning_rate, 'max_epochs:', wandb.config.max_epochs)
    model = train_model(model, 
                        train_loader, 
                        learning_rate=wandb.config.learning_rate, 
                        max_epochs=wandb.config.max_epochs)

    # save weights
    #cpu_model = model.to(torch.device('cpu'))
    #torch.save(cpu_model, 'resnet-augmenting.pt')
    #model = model.to(device)

    # slow down learning
    #learning_rate = learning_rate / 10
    #max_epochs = max_epochs + 300


learning_rate: 0.1 max_epochs: 1
     13.6 seconds - epoch: 0 loss: 3.5810225009918213 train: 5.503472222222222 dev: 5.5 relative_accuracy: 0.9993690851735015


In [17]:
p2 = list(model.parameters())[-1]

In [18]:
p

Parameter containing:
tensor([ 0.0038,  0.0332, -0.0168, -0.0031, -0.0452, -0.0328,  0.0181, -0.0236,
         0.0446, -0.0403,  0.0034, -0.0040, -0.0142, -0.0033, -0.0419, -0.0184,
         0.0236,  0.0015, -0.0072,  0.0091, -0.0490,  0.0292,  0.0280,  0.0376,
         0.0264,  0.0116,  0.0013,  0.0384, -0.0037, -0.0070,  0.0203,  0.0044,
         0.0318,  0.0116,  0.0349, -0.0378,  0.0266,  0.0391], device='cuda:0',
       requires_grad=True)

In [19]:
p2

Parameter containing:
tensor([ 0.0038,  0.0332, -0.0168, -0.0031, -0.0452, -0.0328,  0.0181, -0.0236,
         0.0446, -0.0403,  0.0034, -0.0040, -0.0142, -0.0033, -0.0419, -0.0184,
         0.0236,  0.0015, -0.0072,  0.0091, -0.0490,  0.0292,  0.0280,  0.0376,
         0.0264,  0.0116,  0.0013,  0.0384, -0.0037, -0.0070,  0.0203,  0.0044,
         0.0318,  0.0116,  0.0349, -0.0378,  0.0266,  0.0391], device='cuda:0',
       requires_grad=True)

In [20]:
wandb.config.max_epochs = 20

ConfigError: Attempted to change value of key "max_epochs" from 1 to 20
If you really want to do this, pass allow_val_change=True to config.update()

In [23]:
p3 = list(model.parameters())[-1]

In [24]:
p3

Parameter containing:
tensor([ 0.0038,  0.0343, -0.0108, -0.0097, -0.0453, -0.0328,  0.0184, -0.0188,
         0.0528, -0.0425,  0.0054, -0.0050, -0.0147,  0.0011, -0.0420, -0.0224,
         0.0236, -0.0011, -0.0067,  0.0085, -0.0503,  0.0364,  0.0260,  0.0357,
         0.0257,  0.0118,  0.0035,  0.0379, -0.0005, -0.0008,  0.0204,  0.0048,
         0.0311,  0.0085,  0.0339, -0.0386,  0.0251,  0.0353], device='cuda:0',
       requires_grad=True)