# Computational Intelligence Coursework - Ali

## Imports, Functions, Model, Data Loading

### Imports & Seed

In [2]:
# general imports
import numpy as np

# torch & data manipulation imports
import torch
from torch.utils.data import ConcatDataset, Subset, DataLoader
import torchvision
import torchvision.transforms as transforms

# model-related imports
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit

# seed for reproducibility
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)



<torch._C.Generator at 0x17eafe9d0>

### Define Custom Preprocessing Functions

In [3]:
from torch.utils.data import Dataset

# DATA PREPROCESSING
class CustomDataset(Dataset):
    def __init__(self, dataset, indices, transform=None):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        image, label = self.dataset[self.indices[idx]]
        if self.transform:
            image = self.transform(image)
        return image, label

# calculate mean & standard deviation based on dataset
def calc_mean_std(dataset):
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)
    mean_sum = 0.
    var_sum = 0.
    total_images_count = 0
    for images, _ in dataloader:
        batch_samples = images.size(0)
        images = images.view(batch_samples, images.size(1), -1)
        mean_sum += images.mean(2).sum(0)
        var_sum += images.var(2).sum(0)
        total_images_count += batch_samples

    mean = mean_sum / total_images_count
    var = var_sum / total_images_count
    std = np.sqrt(var)

    return mean, std

# define transformations for data augmentation
def train_transform(data, mean, std):
  data = normalize(data, mean, std)
  transform = transforms.Compose([
                                transforms.RandomHorizontalFlip(0.25),
                                transforms.RandomVerticalFlip(0.25),
                                transforms.RandomGrayscale(0.25),
                                transforms.RandomCrop(32, padding=4)
                                 ])
  return transform(data)

# define normalisation
def normalize(data, mean, std):
  transform = transforms.Compose([
                                transforms.Normalize(mean, std)
                                ])
  return transform(data)


### Define Model Architecture

In [4]:
class Net(nn.Module):
    def __init__(self, dropout_prob):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 64, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 32, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(32)
        self.fc1 = nn.Linear(32 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 32 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

    def freeze_all_but_last():
        for name, param in self.named_parameters():
            if 'fc2' not in name:
                param.requires_grad = False

    # Extract weights from the last layer
    def extract_weights():
        return [p.data.numpy() for p in self.fc2.parameters()]

### Checkpointing

In [5]:
def save_checkpoint(state, filename="checkpoint.pth.tar"):
    torch.save(state, filename)

### Data Loading & Preparation

In [6]:
# load CIFAR-10 dataset & convert to tensor
train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=True,
                                         transform=transforms.ToTensor())

test_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True,
                                        transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [7]:
# dataset hyperparameters
num_folds = 10
test_size = 0.20

# combine train and test datasets for stratified splitting
combined_set = ConcatDataset([train_set, test_set])

# STRATIFIED SPLIT
# collect the labels
labels = [y for _, y in combined_set]

# stratified split subset indices
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
train_idx, test_idx = next(sss.split(np.zeros(len(labels)), labels))

# subset using indices & collect associated labels
stratified_train_set, stratified_train_labels = Subset(combined_set, train_idx).dataset, [labels[i] for i in range(len(labels)) if i in train_idx]
stratified_test_set, stratified_test_labels = Subset(combined_set, test_idx).dataset, [labels[i] for i in range(len(labels)) if i in test_idx]

# create StratifiedKFold object for train set only
skf = StratifiedKFold(n_splits=num_folds)

## Baselines

### Gradient Based - Adam

In [8]:
device = 'cuda'

# function for training and evaluating the model
def adam_train_and_validate(model, train_loader, test_loader, criterion, optimizer, mean, std, epochs=30):
    model.to(device)
    model.train()

    # early stopping parameters
    early_stopping_patience = 3  # number of epochs to wait for improvement before stopping
    early_stopping_counter = 0    # counter for epochs without improvement
    best_accuracy = 0             # track the best accuracy

    # train
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        i = 0

        for inputs, train_load_labels in train_loader:
            inputs = train_transform(inputs, mean, std)
            inputs, train_load_labels = inputs.to(device), train_load_labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, train_load_labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += train_load_labels.size(0)
            correct += (predicted == train_load_labels).sum().item()
            i += 1
            if i % 20 == 0:
              batch_accuracy = 100 * correct / total
              print(f'{i}th Batch Loss: {loss.item():.4f} Batch Accuracy: {batch_accuracy:.4f}')

        epoch_loss = running_loss / total
        epoch_accuracy = 100 * correct / total
        print(f'Epoch [{epoch + 1}/{epochs}] Loss: {loss.item():.4f} Epoch Accuracy: {epoch_accuracy:.4f}')

        model.eval()
        correct = 0
        total = 0

        # validate
        with torch.no_grad():
            for inputs, test_load_labels in test_loader:
                inputs = normalize(inputs, mean, std)
                inputs, test_load_labels = inputs.to(device), test_load_labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += test_load_labels.size(0)
                correct += (predicted == test_load_labels).sum().item()

        validation_accuracy = 100 * correct / total

        # check if the current validation accuracy is better than the best recorded accuracy
        if validation_accuracy > best_accuracy:
            best_accuracy = validation_accuracy
            early_stopping_counter = 0  # Reset the counter
            # save the model checkpoint
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, filename=f"best_model_epoch_{epoch+1}.pth.tar")
        else:
            early_stopping_counter += 1

        print(early_stopping_counter)
        # check if early stopping should be triggered
        if early_stopping_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break

        print(f'Validation Accuracy: {validation_accuracy:.2f}%')
    return model

# function for testing the model
def test(model, test_loader, mean, std):

    model.to(device)
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, test_load_labels in test_loader:
            inputs = normalize(inputs, mean, std)
            inputs, test_load_labels = inputs.to(device), test_load_labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += test_load_labels.size(0)
            correct += (predicted == test_load_labels).sum().item()


    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return model

In [9]:
# Model hyperparameters
dropout_prob = 0.30
num_epochs = 10
batch_size = 64

model = None

# call function
# Main loop for k-fold cross-validation
for fold, (train_fold_indices, val_fold_indices) in enumerate(skf.split(train_idx, stratified_train_labels)):
    print(f'Fold {fold + 1}/{num_folds}')
    mean, std = calc_mean_std(Subset(stratified_train_set, train_fold_indices))

    train_sampler = torch.utils.data.SubsetRandomSampler(train_fold_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_fold_indices)


    train_loader = torch.utils.data.DataLoader(
        dataset=stratified_train_set,
        batch_size=batch_size,
        sampler=train_sampler,
        worker_init_fn=seed_worker,
        generator=g)

    val_loader = torch.utils.data.DataLoader(
        dataset=stratified_train_set,
        batch_size=batch_size,
        sampler=val_sampler,
        worker_init_fn=seed_worker,
        generator=g)

    model = Net(dropout_prob).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    adam_train_and_validate(model, train_loader, val_loader, criterion, optimizer, mean, std, epochs=num_epochs)

Fold 1/10




KeyboardInterrupt: 

In [None]:
# test model
test_loader = torch.utils.data.DataLoader(
    dataset=stratified_test_set,
    batch_size=batch_size,
    worker_init_fn=seed_worker,
    generator=g)

test(model, test_loader, mean, std)

RuntimeError: ignored

## Population Based - Genetic Algorithm

## Proposed - Adaptive Baldwinian-Lamarckian Memetic Algorithm
## Self-regularizing Adam-guided Adaptive-SL-PSO

### Imports, Preprocessing & Definitions

In [None]:
import operator
import random
from matplotlib import pyplot as plt
import math
from deap import base
from deap import benchmarks
from deap import creator
from deap import tools
from numba import jit, cuda
from numpy import genfromtxt

Collecting deap
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.4.1


In [None]:
def neighbours(pop, individual, neighbours=10):
    distances = {}
    for part in pop:
        distance = np.linalg.norm(part-individual)
        if len(distances) < neighbours:
            distances[part] = distance
        else:
            copy = distances.copy()
            for e in copy:
                if distances[e] > distance:
                    distances[part] = distance
                    del distances[e]
                    break
    return sort_population(distances.keys, potential=True)

# Ali's functions for question 3 ----------------------------------------------------------------
def sort_population(population, potential=False):
    if potential:
        population.sort(key=lambda x: x.potential, reverse=True)
    else:
        population.sort(key=lambda x: x.fitness.values, reverse=True)

In [None]:
# Function to freeze all but the last layer
def freeze_all_but_last(model):
    for name, param in model.named_parameters():
        if 'fc2' not in name:
            param.requires_grad = False

# Extract weights from the last layer
def extract_weights_biases(layer):
    return [p.data.numpy() for p in layer.parameters()]

def generate_particle(dimension):
    part = creator.Particle([random.uniform(-1, 1) for _ in range(dimension)])
    part.speed = [random.uniform(-1, 1) for _ in range(dimension)]
    part.smin = -1
    part.smax = 1
    return part

# social learning in a neighbourhood of size
def behaviour_learning(gamma, gbest, part, pop, epsilon, mu, neighbours=10):
    i = pop.index(part)
    neighbour_pop = neighbours(pop, part, neighbours)

    k = math.floor(random.randrange(0, i))
    demonstrator = neighbour_pop[k]

    r1 = (random.uniform(0, 1) for _ in range(len(part)))
    r2 = (random.uniform(0, 1) for _ in range(len(part)))
    r3 = (random.uniform(0, 1) for _ in range(len(part)))
    ones = [1] * len(part)
    one_minus_gamma = list(operator.sub, ones, gamma)

    v_r0 = list(map(operator.mul, r1, part.speed))
    v_r1 = list(map(operator.mul, r2, map(operator.sub, demonstrator, part))) # local best
    v_r2 = list(map(operator.mul,r3, map(operator.mul, [epsilon*x for x in mu], part))) # global best

    sl_speed = list(map(operator.add, v_r1, v_r2))
    exploitation_speed = list(map(operator.mul, gamma, gbest))
    exploration_speed = list(map(operator.mul, one_minus_gamma, sl_speed))

    part.speed = list(map(operator.add, v_r0 , map(operator.add, exploitation_speed, exploration_speed)))
    part[:] = list(map(operator.add, part, part.speed))

# Define the fitness function
def evaluate_particle(model, particle, inputs, labels, potential=True, lamarckian=False):
    weights = np.asarray(particle)


    new_weights = torch.from_numpy(weights[:weights_len].reshape(weights_dim).T).float().to(device)
    new_biases = torch.from_numpy(weights[weights_len:bias_len]).float().to(device)
    model.fc2.weight = torch.nn.Parameter(new_weights)
    model.fc2.bias = torch.nn.Parameter(new_biases)

    inputs, labels = torch.FloatTensor(inputs), torch.Tensor(labels)
    inputs, labels, model = inputs.to(device), labels.to(device), model.to(device)
    outputs = model(inputs)  # input and predict based on images
    loss = criterion(outputs, labels)
    optimizer.zero_grad()  # clear gradients for next train
    if potential or lamarckian:
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients
        outputs = model(inputs)  # input and predict based on images
        loss = criterion(outputs, labels)
        optimizer.zero_grad
        if lamarckian:
           return loss, np.concatenate(model.fc2.weight.data.cpu().numpy().ravel(), model.fc2.bias.data.cpu().numpy().ravel())
    return loss,

In [None]:
def pso_optimize(model, toolbox, pop, inputs, labels, g):
    interval        = 10
    iterations      = 100
    neighbours = 10
    beta = 0.01
    alpha = 0.5

    m = populationSize + math.floor(dimension/10)
    epsilon = beta * (dimension/populationSize)


    gbest = None

    for part in pop:
        part.fitness.values = toolbox.evaluate(model, part, inputs, labels, potential=False, lamarckian=False) #actually only one fitness value

    # Begin the evolution
    #for g in range(iterations):


    # A new Search
    #print("-- Search %i --" % g)

    # find the global best - lamarckian search party lead - gradient descent
    sort_population(pop, potential=False)
    gbest = pop[0]

    for part in pop:
        part.potential = toolbox.evaluate(model, part, inputs, labels, potential=True, lamarckian=False) #actually only one fitness value

    # sort the rest of the baldwinian search participants
    sort_population(pop[1:], potential=True)

    # parameter setting - variable
    mu = [sum(np.asarray(pop)[:,x])/populationSize for x in range(dimension)]
    gamma = 1/(1+math.exp(3 - 6*(min(1-abs(gbest.potential/gbest.fitness.values),1))))
    i = 0


    for part in pop[1:]:
        i = i + 1
        learn_prob = (1 - (i-1)/m)**(alpha*math.log(math.ceil(dimension/m)))
        if random.random() < learn_prob:
            toolbox.learn(gamma, gbest, part, pop[1:], epsilon, mu, neighbours)


        #update global best
        if (not gbest) or gbest.fitness < part.fitness:
            gbest = creator.Particle(part)
            gbest.fitness.values = part.fitness.values

    # set weights to best individual
    weights = np.asarray(gbest)
    model.fc2.weight = torch.nn.Parameter(torch.from_numpy(weights[:weights_len].reshape(weights_dim).T))
    model.fc2.bias = torch.nn.Parameter(torch.from_numpy(weights[weights_len:bias_len]))

    # Gather all the fitnesses in one list and print the stats
    # print every interval
    fits.append(gbest.fitness.values)
    if g%interval==0: # interval
        logbook.record(gen=g, evals=len(pop), **stats.compile(pop))
        print(logbook.stream)
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5


        print("  Min %s" % min(fits))
        print("  Max %s" % max(fits))
        print("  Avg %s" % mean)
        print("  Std %s" % std)
        plt.plot(fits)

    return gbest.fitness.values, gbest

In [None]:
device = 'cuda'

# function for training and evaluating the model
def memetic_train_and_validate(model, toolbox, pop, train_loader, test_loader, criterion, optimizer, mean, std, epochs=30):
    model.to(device)
    model.train()

    # early stopping parameters
    early_stopping_patience = 3  # number of epochs to wait for improvement before stopping
    early_stopping_counter = 0    # counter for epochs without improvement
    best_accuracy = 0             # track the best accuracy

    # train
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        i = 0

        for inputs, train_load_labels in train_loader:
            inputs = train_transform(inputs, mean, std)
            #inputs, train_load_labels = inputs.to(device), train_load_labels.to(device)
            i += 1
            loss, new_weights = pso_optimize(model, toolbox, pop, inputs, train_load_labels, i)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += train_load_labels.size(0)
            correct += (predicted == train_load_labels).sum().item()
            if i % 20 == 0:
              batch_accuracy = 100 * correct / total
              print(f'{i}th Batch Loss: {loss.item():.4f} Batch Accuracy: {batch_accuracy:.4f}')

        epoch_loss = running_loss / total
        epoch_accuracy = 100 * correct / total
        print(f'Epoch [{epoch + 1}/{epochs}] Loss: {loss.item():.4f} Epoch Accuracy: {epoch_accuracy:.4f}')

        model.eval()
        correct = 0
        total = 0

        # validate
        with torch.no_grad():
            for inputs, test_load_labels in test_loader:
                #inputs = normalize(inputs, mean, std)
                inputs, test_load_labels = inputs.to(device), test_load_labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += test_load_labels.size(0)
                correct += (predicted == test_load_labels).sum().item()

        validation_accuracy = 100 * correct / total

        # check if the current validation accuracy is better than the best recorded accuracy
        if validation_accuracy > best_accuracy:
            best_accuracy = validation_accuracy
            early_stopping_counter = 0  # Reset the counter
            # save the model checkpoint
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, filename=f"best_model_epoch_{epoch+1}.pth.tar")
        else:
            early_stopping_counter += 1

        print(early_stopping_counter)
        # check if early stopping should be triggered
        if early_stopping_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break

        print(f'Validation Accuracy: {validation_accuracy:.2f}%')
    return model

In [None]:
fc2_weights = model.fc2.weight.data
weights_dim = fc2_weights.shape
weights_len = len(fc2_weights.reshape(-1))
fc2_bias = model.fc2.bias.data
bias_dim = fc2_bias.shape
bias_len = len(fc2_bias.reshape(-1))
populationSize  = 100
dimension = weights_len + bias_len

# Freeze all layers except the last
freeze_all_but_last(model)

# DEAP inits
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Particle", list, fitness=creator.FitnessMin, speed=list,
                smin=None, smax=None, best=None, potential=None)

toolbox = base.Toolbox()
toolbox.register("particle", generate_particle, dimension)
toolbox.register("population", tools.initRepeat, list, toolbox.particle)
toolbox.register("evaluate", evaluate_particle)
toolbox.register("learn", behaviour_learning)

# Model hyperparameters
dropout_prob = 0.30
num_epochs = 10
batch_size = 64

# call function
# Main loop for k-fold cross-validation
for fold, (train_fold_indices, val_fold_indices) in enumerate(skf.split(train_idx, stratified_train_labels)):
    print(f'Fold {fold + 1}/{num_folds}')
    mean, std = calc_mean_std(Subset(stratified_train_set, train_fold_indices))

    train_sampler = torch.utils.data.SubsetRandomSampler(train_fold_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_fold_indices)

    # create an initial population of individuals
    pop = toolbox.population(n=populationSize)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)


    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields

    fits = []


    train_loader = torch.utils.data.DataLoader(
        dataset=stratified_train_set,
        batch_size=batch_size,
        sampler=train_sampler,
        worker_init_fn=seed_worker,
        generator=g)

    val_loader = torch.utils.data.DataLoader(
        dataset=stratified_train_set,
        batch_size=batch_size,
        sampler=val_sampler,
        worker_init_fn=seed_worker,
        generator=g)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    memetic_train_and_validate(model, toolbox, pop, train_loader, val_loader, criterion, optimizer, mean, std, epochs=num_epochs)

Fold 1/10




RuntimeError: ignored