# DENSER
My implementation of DENSER, a method for architecture selection in neural networks. The paper can be found [here](https://arxiv.org/abs/2004.11002).


## TO DO:
- [ ] Implement crossover in GA class
- [ ] Implement crossover in DSGE class
- [ ] Implement mutation in GA class
- [ ] Implement genetic algorithm

In [113]:
import torch
import torchvision
import torchvision.transforms as transforms
import os
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from enum import Enum
from scripts import utils

%load_ext autoreload
%autoreload 2

## Build the encoder from the DENSER paper 

Fristly we define the lower lavel of the grammatic: a DSGE gene. This class encode a single layer from all the possible one (conv, pool, activation). 
In addition, the class compute the input channels and the output channels of the layer, which depends on the kernel size and the padding.

In [293]:
class DSGE_types(Enum):
    "Layer types for DSGE."
    POOLING = 1
    CONV = 2
    ACTIVATION = 3
    LINEAR = 4

class DSGE_pooling(Enum):
    "Pooling types for DSGE."
    MAX = 1
    AVG = 2

class DSGE_activation(Enum):
    "Activation types for DSGE."
    RELU = 1
    SIGMOID = 2
    TANH = 3

class DSGE_genes:
    "DSGE_encoding class. The DSGE_encoding is composed of a list of genes."
    def __init__(self, type=None, c_in=None, c_out=None, param=None):
        if type is None: # Random init, no type specified (could be pooling, conv, activation, linear)
            self.random_init()
        else:
            self.init_form_encoding(type, param)
        
        if self.type == DSGE_types.CONV:    #if conv, compute the number of channels knowing the input or output channels
            if(c_in is not None):
                self.c_out = utils.compute_output_conv2d(c_in, self.param['kernel_size'], self.param['stride'], self.param['padding'], 1)
                self.c_in = c_in
            elif(c_out is not None):
                self.c_in = utils.compute_input_conv2d(c_out, self.param['kernel_size'], self.param['stride'], self.param['padding'], 1)
                self.c_out = c_out
        else:                       #if not conv, the number of channels is the same as the input
            if(c_out is None):      
                self.c_out = c_in
                self.c_in = c_in
            elif(c_in is None):
                self.c_in = c_out
                self.c_out = c_out
            else:
                self.c_in = c_in
                self.c_out = c_out
                
        ######################################################################
        # FORSE DA AGGIUSTARE PER IL POOLING ########################3#########
        #######################################################################

        self.channels = {'in': self.c_in, 'out': self.c_out}
        
    def random_init(self):
        self.type = DSGE_types(np.random.randint(1, 5))  #randomly choose a type
        self.random_init_param()                  #randomly choose the parameters of the type

    def random_init_param(self):
        if self.type == DSGE_types.POOLING:           #randomly choose a pooling type
            self.param = {"pool_type" : DSGE_pooling(np.random.randint(1, 3)), "kernel_size": np.random.randint(2, 5), "stride": np.random.randint(1, 3), "padding": np.random.randint(0, 2)}
        elif self.type == DSGE_types.CONV:         #randomly choose a kernel size, stride and padding
            self.param = {'kernel_size': np.random.randint(1, 4), 'stride': np.random.randint(1, 4), 'padding': np.random.randint(1, 4)}
        elif self.type == DSGE_types.ACTIVATION:   #randomly choose an activation type
            self.param = DSGE_activation(np.random.randint(1, 4))
        elif self.type == DSGE_types.LINEAR:     #linear layer has no parameters
            self.param = None
    
    def init_form_encoding(self, type, param=None):
        self.type = type   #set the type
        if param is None:   #if no parameters are specified, randomly choose them
            self.random_init_param()
        
    def get(self):  #return the gene
        return self.type, self.param, self.channels



Then we are ready to encode the outer levels, the GA types, which describe a single block of the network (a sequence of DSGE genes). A block could be a features block (conv, pool, activation) or a classifier block (linear, activation). The GA types are encoded in the class GA_type.

In [294]:
class GA_types(Enum):
    "Layer types for GA."
    FEATURES = 1
    CLASSIFICATION = 2

class GA_genes:
    "GA_encoding class. The GA_encoding is composed of a list of genes."
    def __init__(self, GA_type, c_in=None, c_out=None):
        self.GA_type = GA_type #set the type
        self.dsge_encoding = []
        self.dsge_len = 3 if GA_type == GA_types.FEATURES else 2

        if( c_in is not None):  #if the input channels are specified, set them
            if self.GA_type == GA_types.FEATURES: #if the type is features we have a conv then activation and then pooling layer
                self.dsge_encoding.append(DSGE_genes(DSGE_types.CONV, c_in=c_in))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.ACTIVATION, c_in=self.dsge_encoding[-1].channels['out']))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.POOLING, c_in=self.dsge_encoding[-1].channels['out'] ) )
            if self.GA_type == GA_types.CLASSIFICATION: #if the type is classification we have a linear layer and then an activation layer
                self.dsge_encoding.append(DSGE_genes(DSGE_types.LINEAR, c_in=c_in, c_out=c_out))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.ACTIVATION, c_in=c_in, c_out=c_out))
            c_out = self.dsge_encoding[-1].channels['out']
        elif( c_out is not None): #if the output channels are specified, set them
            if self.GA_type == GA_types.FEATURES:
                self.dsge_encoding.append(DSGE_genes(DSGE_types.CONV, c_out=c_out))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.ACTIVATION, c_out=c_out))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.POOLING, c_out=c_out ) )
            if self.GA_type == GA_types.CLASSIFICATION:
                self.dsge_encoding.append(DSGE_genes(DSGE_types.LINEAR, c_in=c_in, c_out=c_out))
                self.dsge_encoding.append(DSGE_genes(DSGE_types.ACTIVATION, c_in=c_in, c_out=c_out))
            c_in = self.dsge_encoding[-1].channels['in']

        self.param = {'input_channels': c_in, 'output_channels': c_out}

    def get(self):
        return self.GA_type, self.dsge_encoding, self.param

    def print(self): #print the GA_encoding
        print( self.GA_type)
        for i in range(len(self.dsge_encoding)):
            print( self.dsge_encoding[i].get())
        print("param: ", self.param)
            

Finally, we encode the whole network, the GA, which is a sequence of GA types:

In [295]:
class Net_encoding:
    "Describe the encoding of a network."
    def __init__(self, num_features, num_classification, c_in, c_out):
        self.len = num_features + num_classification #the length of the encoding is the number of features block and classification block
        self.num_features = num_features 
        self.num_classification = num_classification
        self.GA_encoding = []
        self.param = {'input_channels': c_in, 'output_channels': c_out} 
        tmp1 = c_in
        for i in range(self.num_features):
            self.GA_encoding.append(GA_genes(GA_types.FEATURES, c_in=tmp1))
            tmp1 = self.GA_encoding[-1].param['output_channels']
        for i in range(self.num_classification-1):
            self.GA_encoding.append(GA_genes(GA_types.CLASSIFICATION, c_in=tmp1, c_out=np.random.randint(3, 20)))
            tmp1 = self.GA_encoding[-1].param['output_channels']
        self.GA_encoding.append(GA_genes(GA_types.CLASSIFICATION, c_in=tmp1, c_out=c_out))
        
    def get(self):
        return self.GA_encoding
        
    def print(self):
        print("Net encoding len:", self.len)
        for i in range(self.len):
            print( self.GA_encoding[i].print())

Let's try to build a network with 4 features blocks and 2 classifier blocks, with a 3 channel input (a standard RGB image) and a 10 channel output (class of CIFAR-10):

In [312]:
Net = Net_encoding( 4, 2, 3, 10)
Net.print()


Net encoding len: 6
GA_types.FEATURES
(<DSGE_types.CONV: 2>, {'kernel_size': 3, 'stride': 2, 'padding': 2}, {'in': 3, 'out': 3})
(<DSGE_types.ACTIVATION: 3>, <DSGE_activation.TANH: 3>, {'in': 3, 'out': 3})
(<DSGE_types.POOLING: 1>, {'pool_type': <DSGE_pooling.AVG: 2>, 'kernel_size': 2, 'stride': 1, 'padding': 1}, {'in': 3, 'out': 3})
param:  {'input_channels': 3, 'output_channels': 3}
None
GA_types.FEATURES
(<DSGE_types.CONV: 2>, {'kernel_size': 1, 'stride': 3, 'padding': 1}, {'in': 3, 'out': 2})
(<DSGE_types.ACTIVATION: 3>, <DSGE_activation.SIGMOID: 2>, {'in': 2, 'out': 2})
(<DSGE_types.POOLING: 1>, {'pool_type': <DSGE_pooling.MAX: 1>, 'kernel_size': 3, 'stride': 1, 'padding': 0}, {'in': 2, 'out': 2})
param:  {'input_channels': 3, 'output_channels': 2}
None
GA_types.FEATURES
(<DSGE_types.CONV: 2>, {'kernel_size': 2, 'stride': 2, 'padding': 3}, {'in': 2, 'out': 4})
(<DSGE_types.ACTIVATION: 3>, <DSGE_activation.SIGMOID: 2>, {'in': 4, 'out': 4})
(<DSGE_types.POOLING: 1>, {'pool_type': <D

## From the encoding to the neural network
Now we can build the neural network module from the encoding. We have only to select the correct layer from the DSGE gene and to connect them in the correct order. All the information are contained in the Net_encoding class.

In [308]:
class _Net(nn.Module):
    def __init__(self, Net_encod):
        super(_Net, self).__init__()
        self.features_layer = nn.Sequential()
        for i in range(Net_encod.num_features):
            for j in range(Net_encod.GA_encoding[i].dsge_len):
                self.features_layer.add_module('flayer%d_%d' % (i+1, j+1), self.make_layer(Net_encod.GA_encoding[i].dsge_encoding[j]))

        self.classification_layer = nn.Sequential()
        for i in range(Net_encod.num_classification):
            for j in range(Net_encod.GA_encoding[j].dsge_len):
                self.classification_layer.add_module('clayer%d_%d' % (i+1, j+1), self.make_layer(Net_encod.GA_encoding[j].dsge_encoding[j]))
    
    def make_layer(self, dsge_encod):
            if dsge_encod.type == DSGE_types.CONV:
                return nn.Conv2d(dsge_encod.channels['in'], dsge_encod.channels['out'], dsge_encod.param['kernel_size'], dsge_encod.param['stride'], dsge_encod.param['padding'])
            if dsge_encod.type == DSGE_types.LINEAR:
                return nn.Linear(dsge_encod.channels['in'], dsge_encod.channels['out'])
            if dsge_encod.type == DSGE_types.ACTIVATION:
                if dsge_encod.param == DSGE_activation.RELU:
                    return nn.ReLU()
                if dsge_encod.param == DSGE_activation.SIGMOID:
                    return nn.Sigmoid()
                if dsge_encod.param == DSGE_activation.TANH:
                    return nn.Tanh()
            if dsge_encod.type == DSGE_types.POOLING:
                if dsge_encod.param["pool_type"] == DSGE_pooling.MAX:
                    return nn.MaxPool2d(dsge_encod.param['kernel_size'], dsge_encod.param['stride'], dsge_encod.param['padding'])
                if dsge_encod.param["pool_type"] == DSGE_pooling.AVG:
                    return nn.AvgPool2d(dsge_encod.param['kernel_size'], dsge_encod.param['stride'], dsge_encod.param['padding'])
    def forward(self, x):
        x = self.features_layer(x)
        x= torch.flatten(x, 1)
        x = self.classification_layer(x)
        return x


Let's build the network from the encoding:

In [313]:
model = _Net(Net)
model

_Net(
  (features_layer): Sequential(
    (flayer1_1): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
    (flayer1_2): Tanh()
    (flayer1_3): AvgPool2d(kernel_size=2, stride=1, padding=1)
    (flayer2_1): Conv2d(3, 2, kernel_size=(1, 1), stride=(3, 3), padding=(1, 1))
    (flayer2_2): Sigmoid()
    (flayer2_3): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
    (flayer3_1): Conv2d(2, 4, kernel_size=(2, 2), stride=(2, 2), padding=(3, 3))
    (flayer3_2): Sigmoid()
    (flayer3_3): AvgPool2d(kernel_size=4, stride=1, padding=0)
    (flayer4_1): Conv2d(4, 6, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (flayer4_2): Tanh()
    (flayer4_3): MaxPool2d(kernel_size=2, stride=1, padding=1, dilation=1, ceil_mode=False)
  )
  (classification_layer): Sequential(
    (clayer1_1): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
    (clayer1_2): Sigmoid()
    (clayer1_3): AvgPool2d(kernel_size=4, stride=1, padding=0)
    (clayer2_