# Part 1: Train the model for Experiment 1, 4.4.1 Multiclass Classification of Images

In [1]:
'''Train CIFAR10 with PyTorch.'''
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms
 
import os
import argparse
import numpy as np
#from models import *
import matplotlib.pyplot as plt

from torchvision.datasets import CIFAR100
from torchvision.datasets import CIFAR10
import torch
import torch.nn as nn
import torch.nn.functional as F

import random
import time
from datetime import datetime

import networkx as nx
import numpy as np
import torch
import torch.optim as optim


import os
import sklearn
from sklearn.metrics import confusion_matrix

from torchvision import models
from torchsummary import summary

In [2]:
embed_dim = 16

In [3]:
embed_dim

16

# DLA Model

In [4]:
'''DLA in PyTorch.
Reference:
    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
'''
'''
Code taken from (with a permission published on the website https://github.com/kuangliu/pytorch-cifar/blob/master/LICENSE)
https://github.com/kuangliu/pytorch-cifar/blob/master/models/dla.py
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Root(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=1):
        super(Root, self).__init__()
        self.conv = nn.Conv2d(
            in_channels, out_channels, kernel_size,
            stride=1, padding=(kernel_size - 1) // 2, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, xs):
        x = torch.cat(xs, 1)
        out = F.relu(self.bn(self.conv(x)))
        return out


class Tree(nn.Module):
    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
        super(Tree, self).__init__()
        self.level = level
        if level == 1:
            self.root = Root(2*out_channels, out_channels)
            self.left_node = block(in_channels, out_channels, stride=stride)
            self.right_node = block(out_channels, out_channels, stride=1)
        else:
            self.root = Root((level+2)*out_channels, out_channels)
            for i in reversed(range(1, level)):
                subtree = Tree(block, in_channels, out_channels,
                               level=i, stride=stride)
                self.__setattr__('level_%d' % i, subtree)
            self.prev_root = block(in_channels, out_channels, stride=stride)
            self.left_node = block(out_channels, out_channels, stride=1)
            self.right_node = block(out_channels, out_channels, stride=1)

    def forward(self, x):
        xs = [self.prev_root(x)] if self.level > 1 else []
        for i in reversed(range(1, self.level)):
            level_i = self.__getattr__('level_%d' % i)
            x = level_i(x)
            xs.append(x)
        x = self.left_node(x)
        xs.append(x)
        x = self.right_node(x)
        xs.append(x)
        out = self.root(xs)
        return out


class DLA(nn.Module):
    def __init__(self, block=BasicBlock, num_classes=10):
        super(DLA, self).__init__()
        self.base = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(True)
        )

        self.layer1 = nn.Sequential(
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(True)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(True)
        )

        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
        self.linear1 = nn.Linear(512, embed_dim)
        self.linear2 = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        out = self.base(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1) 
        out = self.linear1(out)
        emb = out
        out = self.linear2(out)
        return out


def test():
    net = DLA()
    print(net)
    x = torch.randn(1, 3, 32, 32)
    y = net(x)
    print(y)


if __name__ == '__main__':
    test()

DLA(
  (base): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer3): Tree(
    (root): Root(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (left_node): BasicBlock(
      (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1

In [6]:
# How to define the model architecture
net = DLA()

In [7]:
# Obtain the summary
net

DLA(
  (base): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer3): Tree(
    (root): Root(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (left_node): BasicBlock(
      (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1

In [8]:
# Check the number of parameters

[param.nelement() for param in net.parameters()]

sum([param.nelement() for param in net.parameters()])

16294634

In [101]:
# Transformation necessary to apply to the images

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [102]:
# Read in (or download) the test data and create a training sample 

trainset = torchvision.datasets.CIFAR10(
    root='/workspace/cifar-10-batches-py', train=True, download=False, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2) #batch_size=1

In [103]:
# Check the length of the training data

len(trainloader.dataset)

50000

In [104]:
# Read in (or download) the test data and create a test sample 

testset = torchvision.datasets.CIFAR10(
    root='/workspace/SPC_Embeddings/cifar-10-batches-py', train=False, download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2) #batch_size=1

In [114]:
# Define the classes

classes = ('airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [106]:
# Define whether to train on cpu or gpu, get the path, define the name of the network (here: net)

net = DLA()
device = 'cpu'
data_path = os.getcwd()

In [107]:
# Set the loss function and the optimization rules

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001,
                      momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [108]:
# Define the train process

def train(epoch):
    #print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    ypred = []
    ytrue = []
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()*inputs.size(0) # to obtain loss for the whole batch, not an averaged value
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # For F1-Score
        ypred_batch = outputs.max(dim=1)[1].detach().numpy().tolist()
        ypred.extend(ypred_batch)
        
        ytrue_batch = targets.detach().numpy()
        ytrue_batch = ytrue_batch.tolist()
        ytrue.extend(ytrue_batch)
        
    return train_loss / len(trainloader.dataset), 100.*correct/total, sklearn.metrics.f1_score(ytrue, ypred, average='macro') # Values for the whole epoch

In [109]:
# Define the test process

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    ypred = []
    ytrue = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()*inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            # For F1-Score
            ypred_batch = outputs.max(dim=1)[1].detach().numpy().tolist()
            ypred.extend(ypred_batch)
        
            ytrue_batch = targets.detach().numpy()
            ytrue_batch = ytrue_batch.tolist()
            ytrue.extend(ytrue_batch)
            
    return test_loss / len(testloader.dataset), 100.*correct/total, sklearn.metrics.f1_score(ytrue, ypred, average='macro') # Values for the whole epoch       


In [110]:
# Set the initial values and the patience criterium: Here, if for 10 epochs the accuracy on the test data does not increase,
# stop the training of the network  

f1score_max = 0.0
epoch_patience = 10
epochs = 0

In [111]:
# Create the way to name the produced interim models from the training

def to_str(var):
    return str(list(np.reshape(np.asarray(var), (1, np.size(var)))[0]))[1:-1]

def truncate(n, decimals=0):
    multiplier = 10 ** decimals
    return int(n * multiplier) / multiplier

In [112]:
# Train the model


hist = {"loss":[], "acc":[], "testloss":[], "test_acc":[], "f1_train":[], "f1_test":[]}
for epoch in range(1, 1001):
    train_loss, train_acc, f1score_train = train(epoch)
    test_loss, test_acc, f1score_test = test(epoch)
   
    hist["loss"].append(train_loss)
    hist["testloss"].append(test_loss)
    hist["acc"].append(train_acc)
    hist["test_acc"].append(test_acc)
    hist["f1_train"].append(f1score_train)
    hist["f1_test"].append(f1score_test)
    
    if f1score_test > f1score_max:
        torch.save(net.state_dict(), data_path +  to_str(truncate(f1score_test, decimals=3)) + 'DLA_CIFAR10.pt')
        f1score_max = f1score_test
        
    #if epoch > 100: 
    if f1score_test < f1score_max:
        epochs += 1
        if epochs == epoch_patience:
            break
    elif f1score_test >= f1score_max:  
        epochs = 0
            
    
    print(f'Epoch: {epoch}, Train loss: {train_loss:.3}, Test loss: {test_loss:.3}, Train acc.: {train_acc:.3}, Test acc.: {test_acc:.3}, f1_train:{f1score_train:.3}, f1_test:{f1score_test:.3}')

Epoch: 1, Train loss: 1.99, Test loss: 1.76, Train acc.: 23.0, Test acc.: 33.5, f1_train:0.204, f1_test:0.306
Epoch: 2, Train loss: 1.62, Test loss: 1.52, Train acc.: 38.5, Test acc.: 42.7, f1_train:0.376, f1_test:0.409
Epoch: 3, Train loss: 1.37, Test loss: 1.29, Train acc.: 49.5, Test acc.: 53.7, f1_train:0.485, f1_test:0.525
Epoch: 4, Train loss: 1.16, Test loss: 1.21, Train acc.: 58.3, Test acc.: 57.2, f1_train:0.578, f1_test:0.562
Epoch: 5, Train loss: 1.01, Test loss: 1.02, Train acc.: 64.0, Test acc.: 64.2, f1_train:0.638, f1_test:0.633
Epoch: 6, Train loss: 0.895, Test loss: 0.877, Train acc.: 68.4, Test acc.: 69.1, f1_train:0.682, f1_test:0.69
Epoch: 7, Train loss: 0.803, Test loss: 0.805, Train acc.: 71.7, Test acc.: 71.5, f1_train:0.716, f1_test:0.718
Epoch: 8, Train loss: 0.717, Test loss: 0.783, Train acc.: 75.0, Test acc.: 73.2, f1_train:0.749, f1_test:0.73
Epoch: 9, Train loss: 0.649, Test loss: 0.743, Train acc.: 77.4, Test acc.: 74.3, f1_train:0.774, f1_test:0.742
Epoc

In [113]:
# Check the final accuracy on the test data; This model is the final version that is implemented for creating the embeddings
f1score_max

0.9043093347866646