In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import PIL.Image as Image
from torchvision import datasets,models
import matplotlib.pyplot as plt
import copy

In [8]:
# prepare a preprocessing pipeline which will be applied before feeding our data into the model
# namely, ToTensor() transforms an image in a tensor and squishes its values between 0 and 1
# Normalize(), instead, normalizes it w.r.t. the given mean and std
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

# We download the train and the test dataset in the given root and applying the given transforms
trainset = torchvision.datasets.MNIST(root='./data', train=True,  download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,  download=True, transform=transform)

batch_size=4

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,  shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,  shuffle=False, num_workers=2)

In [3]:
trainloader.dataset.data.shape 

torch.Size([60000, 28, 28])

In [4]:
# define one of the simplest convolutional neural network - LeNet-5 (simplified iot have less parameters)

class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layers = torch.nn.Sequential(
                nn.Conv2d(1, 6, 5), # output shape [4,6,24,24]
                nn.ReLU(),
                nn.MaxPool2d(2, 2),  #[4, 6, 12, 12]
                nn.Flatten(), # [4, 864]
                nn.Linear(6 * 12 * 12, 120), # [4, 120]
                nn.ReLU(),
                nn.Linear(120, 10)
        )

    def forward(self, x):
        return self.layers(x)

    #when mating is enabled, parent two is chosen using the exponential distribution with λ=holdout. 
    #when mating is disabled, parent one mates with itself and the child is a clone.
    def mate(self, other, mutate=True):
        child = copy.deepcopy(self)
        for i in range(len(child.layers)):
          # change only convolutional and linear layers
          if isinstance(child.layers[i], nn.Linear) or isinstance(child.layers[i], nn.Conv2d):
            pass_on = np.random.rand(1) < 0.5
            child.layers[i] = self.layers[i] if pass_on else other.layers[i]
        if mutate:
            child.mutate(stdev=np.random.rand(1)/10)
        return child

    # define mutation
    # the mutation step is realized as the addition of Gaussian noise to each weight in the network
    def mutate(self, stdev=0.03):
        for i in range(len(self.layers)):
            if isinstance(self.layers[i], nn.Conv2d) or isinstance(self.layers[i], nn.Linear) :
              with torch.no_grad():
                self.layers[i].weight +=  torch.tensor(np.random.normal(0,stdev, list(self.layers[i].weight.shape)))
                self.layers[i].bias +=  torch.tensor(np.random.normal(0, stdev, list(self.layers[i].bias.shape)))

net = LeNet()

In [5]:
# define loss function and optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
net.to(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))

In [6]:
# train the network
def train(net, trainloader, epochs=2):
    device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    for epoch in range(2):  # loop over the dataset multiple times

        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs.to(device)
            labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
    print('Finished Training')

In [9]:
train(net, trainloader)

Finished Training


In [11]:
# save network 
PATH = './trained_net/mnist_lenet.pth'
torch.save(net.state_dict(), PATH)

In [12]:
# look at global performances on the dataset
def eval(model):
  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          images.to(device)
          labels.to(device)
          # calculate outputs by running images through the network
          outputs = model.forward(images)
          # the class with the highest energy is what we choose as prediction
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  accuracy = 100 * correct // total
  print(f'Accuracy of the network on the 10000 test images: {accuracy} %')

In [13]:
eval(net)

Accuracy of the network on the 10000 test images: 98 %


In [8]:
class evolution():
    def __init__(self, fun, population_size=10, holdout=1, mating=True):
        """
        initial function fun is a function to produce nets, used for the original population
        scoring_function must be a function which accepts a net as input and returns a float
        """
        self.population_size = population_size
        self.population = [fun() for _ in range(population_size)]
        self.best_organism = self.population[-1]
        self.best_score = self.scoring_function(self.best_organism)

        self.holdout = max(1, int(holdout * population_size))

        self.mating = True
        

    def generation(self):
        scores = [self.scoring_function(x) for x in self.population]
        self.population = [self.population[x] for x in np.argsort(scores)[::-1]]

        # update best organism and respective accuracy
        self.best_organism = copy.deepcopy(self.population[0])
        self.best_score = sorted(scores)[-1]
        
        new_population = [self.best_organism] # Ensure best organism survives
        for i in range(self.population_size - 1):
            parent_1_idx = i % self.holdout
            if self.mating:
                parent_2_idx = min(self.population_size - 1, int(np.random.exponential(self.holdout)))
            else:
                parent_2_idx = parent_1_idx
            offspring = self.population[parent_1_idx].mate(self.population[parent_2_idx]) # to build new generation functions mate and mutate are called as well
            new_population.append(offspring)
        
        self.population = new_population

    def get_best_organism(self, repeats=1):        
        return self.best_organism, self.best_score

    def scoring_function(self, model):
        correct = 0
        total = 0
        # take each time a certain number of random images on which evaluating the network
        inspected = 10000
        iterations = int(inspected / 4)

        dataloader_iterator = iter(trainloader)

        with torch.no_grad():
            for i in range(iterations):
                try:
                    images, labels = next(dataloader_iterator)

                    # calculate outputs by running images through the network
                    outputs = model.forward(images)
                    # the class with the highest energy is what we choose as prediction
                    _, predicted = torch.max(outputs.data, 1)
                    correct += (predicted == labels).sum().item()
                    total += labels.size(0)
                except StopIteration:
                    print("StopIteration, not enough data")
                

        accuracy = 100 * correct // total
        return(accuracy)

In [9]:
# The function to create the initial population
net_creator = lambda : LeNet()

In [10]:
# initialize the class which will handle evolution of the NNs
curr_env = evolution(net_creator, population_size=20, holdout=0.4, mating=True)

# get current most suitable network (organism)
best_net, score = curr_env.get_best_organism()
acc = [score]
best_nets = [best_net]

In [11]:
eval(best_nets[-1])

Accuracy of the network on the 10000 test images: 7 %


In [12]:
generations = 40
for i in range(generations):
    curr_env.generation()
    this_generation_best, score = curr_env.get_best_organism()
    best_nets.append(this_generation_best)
    acc.append(score)
    if i%5==0:
      print("Generation ", i , "'s best network accuracy: ", score, "%")

Generation  0 's best network accuracy:  17 %
Generation  5 's best network accuracy:  17 %
Generation  10 's best network accuracy:  19 %
Generation  15 's best network accuracy:  23 %
Generation  20 's best network accuracy:  27 %
Generation  25 's best network accuracy:  27 %
Generation  30 's best network accuracy:  28 %
Generation  35 's best network accuracy:  27 %


In [13]:
eval(best_nets[-1])

Accuracy of the network on the 10000 test images: 28 %
