In [3]:
import os
import torch
from torch import nn
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torchvision import transforms
from collections import OrderedDict
from accelerate import Accelerator

In [263]:
import time

In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

# Config

In [257]:
def get_global_configuration():
    """ Retrieve configuration of the training process. """

    global_config = {
      "num_layers_to_add": 5,
    }

    return global_config

In [301]:
def get_model_configuration():
    """ Retrieve configuration for the model. """

    model_config = {
      "width": 32,
      "height": 32,
      "channels": 3,
      "num_classes": 10,
      "batch_size": 250,
      "loss_function": nn.CrossEntropyLoss,
      "optimizer": torch.optim.Adam,
      "num_epochs": 3,
      "hidden_layer_dim": 256,
    }

    return model_config

# NN Models

In [247]:
class LayerConfigurableNN(nn.Module):
    '''
    Layer-wise configurable NN
    '''
    def __init__(self, added_layers = 0):
        super().__init__()

        # Retrieve model configuration
        self.config = get_model_configuration()
        self.width, self.height, self.channels = config.get("width"), config.get("height"), config.get("channels")
        self.flatten_shape = config.get("width") * config.get("height") * config.get("channels")
        self.layer_dim = config.get("layer_dim")
        self.num_classes = config.get("num_classes")

        # Create layer structure
        layers = self.init_layers()
        
        for i in range(added_layers):
            self.add_layer()

        # Create output layers
        for layer in self.get_output_layers():
            layers.append((str(len(layers)), layer))

        # Initialize the Sequential structure
        self.layers = nn.Sequential(OrderedDict(layers))
    
    def init_layers(self):
        raise NotImplementedError
        
    def get_intermediate_layers(self):
        raise NotImplementedError 
        
    def get_output_layer(self):
        raise NotImplementedError

    def forward(self, x):
        '''Forward pass'''
        return self.layers(x)

    def set_structure(self, layers):
        self.layers = nn.Sequential(OrderedDict(layers))
        
    def num_weights(self):
        return sum(p.numel() for p in self.parameters())

    def num_trainable_weights(self):
        return sum(p.numel() for p in self.parameters() if p.requires_grad)
    
    def add_layer(self):
        """ Add a new layer to a model, setting all others to nontrainable. """
        config = get_model_configuration()

        # Retrieve current layers
        layers = self.layers
        print("="*50)
        print("Old structure:")
        print(layers)

        # Save last layer for adding later
        last_layer = layers[-1]

        # Define new structure
        new_structure = []

        # Iterate over all except last layer
        for layer_index in range(len(layers) - 1):

            # For old layer, set all parameters to nontrainable
            old_layer = layers[layer_index]
            for param in old_layer.parameters():
                param.requires_grad = False

            # Append old layer to new structure
            new_structure.append((str(layer_index), old_layer))

        # Append new layer to the final intermediate layer
        new_layers = self.get_intermediate_layers()
        for layer in new_layers:
            new_structure.append((str(len(new_structure)), layer))

        # Re-add last layer
        new_structure.append((str(len(new_structure)), last_layer))

        # Change the model structure
        self.set_structure(new_structure)

        # Return the model
        print("="*50)
        print("New structure:")
        print(self.layers)

In [222]:
# The images in CIFAR-10 are of size 3x32x32

In [250]:
class LayerConfigurableCNN(LayerConfigurableNN):
    '''
    Layer-wise configurable CNN.
    '''
    def __init__(self, added_layers = 0):
        self.out_channels = 6
        self.init_kernel_size = 8
        self.hidden_kernel_size = 4
        self.mp_layers = 1 # max pool layers

        super().__init__()
        
    def init_layers(self):
        return [(str(0), nn.Conv2d(self.channels, self.out_channels, self.init_kernel_size)), 
                (str(1), nn.MaxPool2d(2)), 
                (str(2), nn.ReLU())]
        
    def get_intermediate_layers(self):
        return [nn.Conv2d(self.out_channels, self.out_channels, self.hidden_kernel_size), nn.ReLU()]
    
    def get_output_layers(self):
        self.flatten_out_shape = int(self.out_channels * (self.width / (2 * self.mp_layers) - self.hidden_kernel_size
            ) * (self.height / (2 * self.mp_layers) - self.hidden_kernel_size))
        
        return [nn.Flatten(), nn.Linear(self.flatten_out_shape, self.num_classes)]



In [251]:
LayerConfigurableCNN()

LayerConfigurableCNN(
  (layers): Sequential(
    (0): Conv2d(3, 6, kernel_size=(8, 8), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=864, out_features=10, bias=True)
  )
)

In [224]:
class LayerConfigurableMLP(LayerConfigurableNN):
    '''
    Layer-wise configurable Multilayer Perceptron.
    '''
    def __init__(self, added_layers = 0):
        super().__init__()

    def init_layers(self):
        return [
          (str(0), nn.Flatten()),
          (str(1), nn.Linear(self.flatten_shape, self.layer_dim)),
          (str(2), nn.ReLU())
        ]

        
    def get_intermediate_layers(self):
        return [nn.Linear(self.layer_dim, self.layer_dim), nn.ReLU()]
    
    def get_output_layers(self):
        return [nn.Linear(self.layer_dim, self.num_classes)]

In [186]:
dataset = CIFAR10(os.getcwd(), download=True, transform=transforms.ToTensor())

Files already downloaded and verified


In [187]:
def get_dataset(train=True, invariant=False):
    """ Load and convert dataset into inputs and targets """
    config = get_model_configuration()
    if invariant:
        T = transforms.Compose([
            transforms.RandomChoice([transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip()]),
            transforms.RandomRotation((0, 360)),
            transforms.ToTensor(),
        ])
    else:
        T = transforms.ToTensor()
    dataset = CIFAR10(os.getcwd(), train=train, download=True, transform=T)
    trainloader = torch.utils.data.DataLoader(dataset, batch_size=config.get("batch_size"), shuffle=True, num_workers=1)

    return trainloader

# Script

In [137]:
output_data = []
targets_data = []
config = get_model_configuration()
model = LayerConfigurableMLP()
loss_function = config.get("loss_function")()

for i, data in enumerate(testloader):
    inputs, targets = data
    
    # Perform forward pass
    outputs = model(inputs)
    loss = loss_function(outputs, targets)
    current_loss += loss.item()
    output_data.extend(outputs.detach().numpy())
    targets_data.extend(targets.detach().numpy())
    

# Testing

In [225]:
m1 = LayerConfigurableCNN()
m2 = LayerConfigurableMLP()

In [229]:
m1.add_layer()

Old structure:
Sequential(
  (0): Conv2d(3, 6, kernel_size=(8, 8), stride=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): ReLU()
  (3): Flatten(start_dim=1, end_dim=-1)
  (4): Conv2d(6, 6, kernel_size=(4, 4), stride=(1, 1))
  (5): ReLU()
  (6): Linear(in_features=96, out_features=10, bias=True)
)
New structure:
Sequential(
  (0): Conv2d(3, 6, kernel_size=(8, 8), stride=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): ReLU()
  (3): Flatten(start_dim=1, end_dim=-1)
  (4): Conv2d(6, 6, kernel_size=(4, 4), stride=(1, 1))
  (5): ReLU()
  (6): Conv2d(6, 6, kernel_size=(4, 4), stride=(1, 1))
  (7): ReLU()
  (8): Linear(in_features=96, out_features=10, bias=True)
)


In [230]:
m1.num_trainable_weights()

1552

In [232]:
m2.add_layer()

Old structure:
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)
New structure:
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=256, bias=True)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=10, bias=True)
)


In [233]:
m2.num_trainable_weights()

68362

In [217]:
m2.num_weights()
m2.add_layer()

Old structure:
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=256, bias=True)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=10, bias=True)
)
New structure:
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=256, bias=True)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=256, bias=True)
  (6): ReLU()
  (7): Linear(in_features=256, out_features=10, bias=True)
)


In [219]:
m2.num_trainable_weights()

68362

# Training

In [302]:
def get_top1_pos(outputs, targets):
    pred = np.argmax(outputs, axis=1)
    assert(len(pred) == len(targets))
    
    return np.sum(np.where(pred == targets, 1, 0))

def get_top5_pos(outputs, targets):
    sm = 0
    for i in range(len(targets)):
        top_5 = np.argpartition(outputs[i], -5)[-5:]
        sm += 1 if targets[i] in set(top_5) else 0 
    
    return sm

def test_model(model, loss_function):
    testloader = get_dataset(train=False)
    
    output_data = []
    targets_data = []
    current_loss = 0
    
    for i, data in enumerate(testloader):
        inputs, targets = data

        # Perform forward pass
        outputs = model(inputs)
        
        output_data.extend(outputs.detach().numpy())
        targets_data.extend(targets.detach().numpy())

        loss = loss_function(outputs, targets)
        current_loss += loss.item()
        
    N = len(targets_data)
    top1_acc = get_top1_pos(output_data, targets_data) / N
    top5_acc = get_top5_pos(output_data, targets_data) / N
    
    return current_loss, top1_acc, top5_acc

def train_model(model, epochs=None, debug=False):
    """ Train a model. """
    config = get_model_configuration()
    loss_function = config.get("loss_function")()
    optimizer = config.get("optimizer")(model.parameters(), lr=1e-4)
    trainloader = get_dataset()
    accelerator = Accelerator()  

    # Accelerate model
    model, optimizer, trainloader = accelerator.prepare(model, optimizer, trainloader)

    # Iterate over the number of epochs
    entries = []
    
    if epochs is None:
        epochs = config.get("num_epochs")
    
    for epoch in range(epochs):
        # Print epoch
        print(f'Starting epoch {epoch+1}')

        # Set current loss value
        current_loss = 0.0
        
        # Positive / Accuracy Rate
        top_1_positives = 0 
        top_5_positives = 0
        n = 0
        
        output_data = []
        targets_data = []
 
        # Iterate over the DataLoader for training data
        st_time = time.time()
        for i, data in enumerate(trainloader, 0):
#             print(i)

            # Get inputs
            inputs, targets = data

            # Zero the gradients
            optimizer.zero_grad()

            # Perform forward pass
            outputs = model(inputs)

            # Compute loss
#             print(outputs)
#             print(outputs.shape)
#             print(targets)
#             print(targets.shape)
            loss = loss_function(outputs, targets)
            
#             o, t = outputs.detach().numpy(), targets.detach().numpy()
#             top_1_positives += get_top1_pos(o, t)
#             top_5_positives += get_top5_pos(o, t)
#             n += len(targets)

            output_data.extend(outputs.cpu().detach().numpy())
            targets_data.extend(targets.cpu().detach().numpy())
            current_loss += loss.item()
            
            # Perform backward pass
            accelerator.backward(loss)

            # Perform optimization
            optimizer.step()
            
            # Print statistics
            if debug:
                print('Loss after mini-batch %5d: %.3f' %
                    (i + 1, current_loss / 500))
#             end_loss = current_loss / 500
#             current_loss = 0.0

        end_time = time.time()
    
        top1_acc = get_top1_pos(output_data, targets_data) / len(targets_data)
        top5_acc = get_top5_pos(output_data, targets_data) / len(targets_data)
        
        train_entry = {'type': 'train', 'epoch': epoch, 'top1': top1_acc, 'top5': top5_acc,
                       'loss': current_loss, 'time': round(end_time - st_time, 1)}
        
        test_st_time = time.time()
        test_loss, test_top1_acc, test_top5_acc = test_model(model, loss_function)
        test_end_time = time.time()
        
        print(f'Loss: {current_loss}')
        print(f'Train Acc: {top1_acc}')
        print(f'Test Acc: {test_top1_acc}')
        
        test_entry = {'type': 'test', 'epoch': epoch, 'top1': test_top1_acc, 'top5': test_top5_acc,
                      'loss': test_loss, 'time': round(test_st_time - test_end_time, 1)}
        
        entries.extend([train_entry, test_entry])

    print(n)
    print(top_1_positives)
    print(top_5_positives)

    # Return trained model
    return model, pd.DataFrame(entries), current_loss

In [304]:
mlp = LayerConfigurableMLP()
mlp, mlp_df = train_model(mlp)

Files already downloaded and verified
Starting epoch 1
Files already downloaded and verified
Loss: 399.7074911594391
Train Acc: 0.29368
Test Acc: 0.3426
Starting epoch 2
Files already downloaded and verified
Loss: 369.48126745224
Train Acc: 0.35666
Test Acc: 0.3582
Starting epoch 3
Files already downloaded and verified
Loss: 357.95565962791443
Train Acc: 0.37596
Test Acc: 0.3896
0
0
0


ValueError: too many values to unpack (expected 2)

In [None]:
# cnn = LayerConfigurableCNN()
# cnn, cnn_df = train_model(model)

In [None]:
def greedy_layerwise_training(model):
    """ Perform greedy layer-wise training. """
    
    print("NEW!")
    global_config = get_global_configuration()
    torch.manual_seed(42)

    # Loss comparison
    loss_comparable = float('inf')

    # Iterate over the number of layers to add
    training_losses = []
    top5_accs = []
    top1_accs = []
    
    dfs = []
    for num_layers in range(global_config.get("num_layers_to_add")):
        # Print which model is trained
        print("="*100)
        if num_layers > 0:
            print(f">>> TRAINING THE MODEL WITH {num_layers} ADDITIONAL LAYERS:")
        else:
            print(f">>> TRAINING THE BASE MODEL:")

        # Train the model
        model, df, end_loss = train_model(model)
        df['layer'] = num_layers
        dfs.append(df)

        # Compare loss
        if num_layers > 0 and end_loss < loss_comparable:
            print("="*50)
            print(f">>> RESULTS: Adding this layer has improved the model loss from {loss_comparable} to {end_loss}")
            loss_comparable = end_loss
        elif num_layers > 0:
            print("="*50)
            print(f">>> RESULTS: Adding this layer did not improve the model loss from {loss_comparable} to {end_loss}")
        elif num_layers == 0:
            loss_comparable = end_loss

        # Add layer to model
        model.add_layer()

    # Process is complete
    print("Training process has finished.")
    
    return model, pd.concat(dfs)

In [293]:
results_df[results_df['epoch'] == 2]

Unnamed: 0,type,epoch,top1,top5,loss,time,layer
4,train,2,0.37734,0.84736,357.612984,,0
5,test,2,0.3828,0.8565,70.79449,13.3,0
4,train,2,0.38296,0.86356,348.311132,,1
5,test,2,0.3891,0.8648,69.314775,12.9,1
4,train,2,0.38516,0.86536,345.427758,,2
5,test,2,0.388,0.8664,68.870548,12.7,2
4,train,2,0.38208,0.86618,344.685656,,3
5,test,2,0.3795,0.8683,68.841312,12.7,3
4,train,2,0.38164,0.8663,344.831558,,4
5,test,2,0.3777,0.8662,68.858117,13.4,4


In [295]:
mlp_model, results_df = greedy_layerwise_training(LayerConfigurableMLP())

NEW!
>>> TRAINING THE BASE MODEL:
Files already downloaded and verified
Starting epoch 1
Files already downloaded and verified
Loss: 399.98213946819305
Train Acc: 0.29576
Test Acc: 0.3287
Starting epoch 2
Files already downloaded and verified
Loss: 369.30824625492096
Train Acc: 0.35532
Test Acc: 0.3654
Starting epoch 3
Files already downloaded and verified
Loss: 357.61298418045044
Train Acc: 0.37734
Test Acc: 0.3828
Starting epoch 4
Files already downloaded and verified
Loss: 349.37613356113434
Train Acc: 0.38926
Test Acc: 0.3958
Starting epoch 5
Files already downloaded and verified
Loss: 342.0980815887451
Train Acc: 0.4056
Test Acc: 0.4066
Starting epoch 6
Files already downloaded and verified
Loss: 335.85399198532104
Train Acc: 0.41578
Test Acc: 0.409
Starting epoch 7
Files already downloaded and verified
Loss: 330.08105981349945
Train Acc: 0.42612
Test Acc: 0.4292
Starting epoch 8
Files already downloaded and verified
Loss: 325.2761056423187
Train Acc: 0.43522
Test Acc: 0.429
Start

KeyboardInterrupt: 

In [None]:
cnn_model, results_df = greedy_layerwise_training(LayerConfigurableCNN())