In [3]:
from random import random, randint, seed
from statistics import mean
from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torch.optim as optim

import numpy as np
import math
import copy


import torchvision
import torchvision.transforms as transforms
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import PIL.Image as Image
from torchvision import datasets,models

In [2]:
# dowload the dataset
# prepare a preprocessing pipeline 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

# We download the train and the test dataset in the given root and applying the given transforms
trainset = torchvision.datasets.MNIST(root='./data', train=True,  download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,  download=True, transform=transform)

batch_size=4

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,  shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,  shuffle=False, num_workers=2)

In [71]:
class ConvNet(nn.Module):
    def __init__(self, genotype):
        super().__init__()
        self.layerlist = []
        self.input_channels = 1
        self.output_channels = 6
        self.output_size = 10
        self.kernel_size = 5
        self.input_size = 28

        self.build_layers(genotype)
        self.layers = torch.nn.Sequential(*self.layerlist)        

    def forward(self, x):
        out = self.layers(x)
        return out
    
    def build_layers(self, genotype):
        for i, gene in enumerate(genotype): 
            # add final layer
            if gene == 0:
                self.layerlist.append(torch.nn.Flatten())
                size = self.output_channels*(self.input_size)**2
                self.layerlist.append(torch.nn.Linear(size, 120))
                self.layerlist.append(torch.nn.ReLU())
                self.layerlist.append(torch.nn.Linear(120, 84))
                self.layerlist.append(torch.nn.ReLU())
                self.layerlist.append(torch.nn.Linear(84, self.output_size))

            # add another intermediate layer
            elif gene == 1:
                output_channels = np.random.randint(1, 10)
                if genotype[i+1] == 2:
                    output_channels = 4
                kernel_size = np.random.randint(2, 5)
                self.layerlist.append(torch.nn.Conv2d(in_channels=self.input_channels, out_channels=output_channels, kernel_size=kernel_size, stride=1, padding=2))
                self.layerlist.append(torch.nn.ReLU())
                self.layerlist.append(torch.nn.MaxPool2d(2, 2))
                self.input_size = int((self.input_size - kernel_size + 1 + 4)/2)
                self.output_channels = output_channels
                self.input_channels = output_channels
                self.kernel_size = kernel_size

            # add non-parametrizable block
            elif gene == 2:
                output_channels = 6
                kernel_size = 3
                self.layerlist.append(torch.nn.Conv2d(in_channels=4, out_channels=output_channels, kernel_size=kernel_size,stride=1, padding=2))
                self.layerlist.append(torch.nn.ReLU())
                self.layerlist.append(torch.nn.MaxPool2d(2, 2))
                self.input_size = int((self.input_size - kernel_size + 1 + 4)/2)
                self.output_channels = output_channels
                self.input_channels = output_channels
                self.kernel_size = kernel_size

    # mate function combines two networks by randomly selecting weights from one of the parents
    def mate(self, other, mutate=True):
        child = copy.deepcopy(self)
        for i in range(len(child.layers)):
          # change only convolutional and linear layers
          if isinstance(child.layers[i], nn.Linear) or isinstance(child.layers[i], nn.Conv2d):
            pass_on = np.random.rand(1) < 0.5
            child.layers[i] = self.layers[i] if pass_on else other.layers[i]
        if mutate:
            child.mutate(stdev=np.random.rand(1)/10)
        return child

    # the mutation step is realized as the addition of Gaussian noise to each weight in the network
    def mutate(self, stdev=0.03):
        for i in range(len(self.layers)):
            if isinstance(self.layers[i], nn.Conv2d) or isinstance(self.layers[i], nn.Linear):
              with torch.no_grad():
                self.layers[i].weight +=  torch.tensor(np.random.normal(0,stdev, list(self.layers[i].weight.shape)))
                self.layers[i].bias +=  torch.tensor(np.random.normal(0, stdev, list(self.layers[i].bias.shape)))

In [72]:
torch.manual_seed(123456)

model=ConvNet([1,2,1,0])
# feed data into the model
#x = trainloader.dataset.data[0].unsqueeze(0).float()
#y = model.forward(x)

dataloader_iterator = iter(trainloader)
inputs, labels = next(dataloader_iterator)
#y = model(inputs)
print(inputs.shape)

summary(model, input_size=(1, 1, 28, 28))

torch.Size([4, 1, 28, 28])
Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       40
|    └─ReLU: 2-2                         --
|    └─MaxPool2d: 2-3                    --
|    └─Conv2d: 2-4                       222
|    └─ReLU: 2-5                         --
|    └─MaxPool2d: 2-6                    --
|    └─Conv2d: 2-7                       679
|    └─ReLU: 2-8                         --
|    └─MaxPool2d: 2-9                    --
|    └─Flatten: 2-10                     --
|    └─Linear: 2-11                      13,560
|    └─ReLU: 2-12                        --
|    └─Linear: 2-13                      10,164
|    └─ReLU: 2-14                        --
|    └─Linear: 2-15                      850
Total params: 25,515
Trainable params: 25,515
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       40
|    └─ReLU: 2-2                         --
|    └─MaxPool2d: 2-3                    --
|    └─Conv2d: 2-4                       222
|    └─ReLU: 2-5                         --
|    └─MaxPool2d: 2-6                    --
|    └─Conv2d: 2-7                       679
|    └─ReLU: 2-8                         --
|    └─MaxPool2d: 2-9                    --
|    └─Flatten: 2-10                     --
|    └─Linear: 2-11                      13,560
|    └─ReLU: 2-12                        --
|    └─Linear: 2-13                      10,164
|    └─ReLU: 2-14                        --
|    └─Linear: 2-15                      850
Total params: 25,515
Trainable params: 25,515
Non-trainable params: 0

In [74]:
# train the network using SGD but without using all the training data at once
def train(model, trainloader):
    inspected = 10000
    iterations = int(inspected / 4)

    dataloader_iterator = iter(trainloader)

    # define the loss function and the optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for i in range(iterations):
        try:
            inputs, labels = next(dataloader_iterator)

            # zero the parameter gradients
            optimizer.zero_grad()

            # calculate outputs by running images through the network
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        except StopIteration:
            print("StopIteration, not enough data")
    
    return model
        

In [75]:
# look at global performances on the testset
def eval(model):
  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          # calculate outputs by running images through the network
          outputs = model.forward(images)
          # the class with the highest energy is what we choose as prediction
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  accuracy = 100 * correct // total
  print(f'Accuracy of the network on the 10000 test images: {accuracy} %')

In [76]:
train(model, trainloader)
eval(model)

Accuracy of the network on the 10000 test images: 84 %


In [77]:
class evolution():
    def __init__(self, fun, population_size=10, holdout=1, mating=True):
        """
        initial function fun is a function to produce nets, used for the original population
        scoring_function must be a function which accepts a net as input and returns a float
        """
        self.population_size = population_size
        self.population = [fun([1,2,1,0]) for _ in range(population_size)]
        self.best_organism = self.population[-1]
        self.best_score = self.scoring_function(self.best_organism)

        self.holdout = max(1, int(holdout * population_size))

        self.mating = True
        

    def generation(self):
        self.population = [train(self.population[0], trainloader) for x in self.population]
        scores = [self.scoring_function(x) for x in self.population]
        self.population = [self.population[x] for x in np.argsort(scores)[::-1]]

        # update best organism and respective accuracy
        self.best_organism = copy.deepcopy(self.population[0])
        self.best_score = sorted(scores)[-1]
        
        new_population = [self.best_organism] # Ensure best organism survives
        for i in range(self.population_size - 1):
            parent_1_idx = i % self.holdout
            if self.mating:
                parent_2_idx = min(self.population_size - 1, int(np.random.exponential(self.holdout)))
            else:
                parent_2_idx = parent_1_idx
            offspring = self.population[parent_1_idx].mate(self.population[parent_2_idx]) # to build new generation functions mate and mutate are called as well
            new_population.append(offspring)
        
        self.population = new_population

    def get_best_organism(self, repeats=1):        
        return self.best_organism, self.best_score

    def scoring_function(self, model):
        correct = 0
        total = 0
        # take each time a certain number of random images on which evaluating the network
        inspected = 10000
        iterations = int(inspected / 4)

        dataloader_iterator = iter(testloader)

        with torch.no_grad():
            for i in range(iterations):
                try:
                    images, labels = next(dataloader_iterator)

                    # calculate outputs by running images through the network
                    outputs = model.forward(images)
                    # the class with the highest energy is what we choose as prediction
                    _, predicted = torch.max(outputs.data, 1)
                    correct += (predicted == labels).sum().item()
                    total += labels.size(0)
                except StopIteration:
                    print("StopIteration, not enough data")
                

        accuracy = 100 * correct // total
        return(accuracy)

In [78]:
# The function to create the initial population
net_creator = lambda g : ConvNet(g)

# initialize the class which will handle evolution of the NNs
curr_env = evolution(net_creator, population_size=4, holdout=0.6, mating=True)

# get current most suitable network (organism)
best_net, score = curr_env.get_best_organism()
acc = [score]
best_nets = [best_net]

In [79]:
acc

[11]

In [80]:
curr_env.population[0]
dataloader_iterator = iter(trainloader)
inputs, labels = next(dataloader_iterator)
y = curr_env.population[0](inputs)
print(inputs.shape)
#summary(curr_env.population[0])

torch.Size([4, 1, 28, 28])


In [64]:
train(curr_env.population[0], trainloader)

ConvNet(
  (layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=150, out_features=120, bias=True)
    (11): ReLU()
    (12): Linear(in_features=120, out_features=84, bias=True)
    (13): ReLU()
    (14): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [81]:
generations = 3
for i in range(generations):
    curr_env.generation()
    this_generation_best, score = curr_env.get_best_organism()
    best_nets.append(this_generation_best)
    acc.append(score)
    #if i%5==0:
    print("Generation ", i , "'s best network accuracy: ", score, "%")

Generation  0 's best network accuracy:  90 %
Generation  1 's best network accuracy:  97 %
Generation  2 's best network accuracy:  96 %
