# Search Space

In [1]:
class MLPSearchSpace(object):

    def __init__(self, target_classes):

        self.target_classes = target_classes
        self.vocab = self.vocab_dict()


    def vocab_dict(self):
    	# define the allowed nodes and activation functions
        nodes = [8, 16, 32, 64, 128, 256, 512]
        act_funcs = ['sigmoid', 'tanh', 'relu', 'elu']
        
        # initialize lists for keys and values of the vocabulary
        layer_params = []
        layer_id = []
        
        # for all activation functions for each node
        for i in range(len(nodes)):
            for j in range(len(act_funcs)):
            	
                # create an id and a configuration tuple (node, activation)
                layer_params.append((nodes[i], act_funcs[j]))
                layer_id.append(len(act_funcs) * i + j + 1)
        
        # zip the id and configurations into a dictionary
        vocab = dict(zip(layer_id, layer_params))
        
        # add dropout in the volcabulary
        vocab[len(vocab) + 1] = (('dropout'))
        
        # add the final softmax/sigmoid layer in the vocabulary
        if self.target_classes == 2:
            vocab[len(vocab) + 1] = (self.target_classes - 1, 'sigmoid')
        else:
            vocab[len(vocab) + 1] = (self.target_classes, 'softmax')
        return vocab


	# function to encode a sequence of configuration tuples
    def encode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        encoded_sequence = []
        for value in sequence:
            encoded_sequence.append(keys[values.index(value)])
        return encoded_sequence


	# function to decode a sequence back to configuration tuples
    def decode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        decoded_sequence = []
        for key in sequence:
            decoded_sequence.append(values[keys.index(key)])
        return decoded_sequence


In [2]:
mlp = MLPSearchSpace(10)

x = mlp.vocab_dict()
for key, value in x.items():
    print(key, value)

1 (8, 'sigmoid')
2 (8, 'tanh')
3 (8, 'relu')
4 (8, 'elu')
5 (16, 'sigmoid')
6 (16, 'tanh')
7 (16, 'relu')
8 (16, 'elu')
9 (32, 'sigmoid')
10 (32, 'tanh')
11 (32, 'relu')
12 (32, 'elu')
13 (64, 'sigmoid')
14 (64, 'tanh')
15 (64, 'relu')
16 (64, 'elu')
17 (128, 'sigmoid')
18 (128, 'tanh')
19 (128, 'relu')
20 (128, 'elu')
21 (256, 'sigmoid')
22 (256, 'tanh')
23 (256, 'relu')
24 (256, 'elu')
25 (512, 'sigmoid')
26 (512, 'tanh')
27 (512, 'relu')
28 (512, 'elu')
29 dropout
30 (10, 'softmax')


# Constants

In [3]:
########################################################
#                   NAS PARAMETERS                     #
########################################################
CONTROLLER_SAMPLING_EPOCHS = 10
SAMPLES_PER_CONTROLLER_EPOCH = 10
CONTROLLER_TRAINING_EPOCHS = 10
ARCHITECTURE_TRAINING_EPOCHS = 10
CONTROLLER_LOSS_ALPHA = 0.9

########################################################
#               CONTROLLER PARAMETERS                  #
########################################################
CONTROLLER_LSTM_DIM = 100
CONTROLLER_OPTIMIZER = 'Adam'
CONTROLLER_LEARNING_RATE = 0.01
CONTROLLER_DECAY = 0.1
CONTROLLER_MOMENTUM = 0.0
CONTROLLER_USE_PREDICTOR = True

########################################################
#                   MLP PARAMETERS                     #
########################################################
MAX_ARCHITECTURE_LENGTH = 3
MLP_OPTIMIZER = 'Adam'
MLP_LEARNING_RATE = 0.01
MLP_DECAY = 0.0
MLP_MOMENTUM = 0.0
MLP_DROPOUT = 0.2
MLP_LOSS_FUNCTION = 'categorical_crossentropy'
MLP_ONE_SHOT = True

########################################################
#                   DATA PARAMETERS                    #
########################################################
TARGET_CLASSES = 3

########################################################
#                  OUTPUT PARAMETERS                   #
########################################################
TOP_N = 5

# MLP Generator

In [4]:
import os
import warnings
import pandas as pd

from torch import optim as optimizers
from torch.nn import Sequential, Linear, Dropout, Flatten

In [5]:
class MLPGenerator(MLPSearchSpace):
    def __init__(self):
        self.target_classes = TARGET_CLASSES
        self.mlp_optimizer = MLP_OPTIMIZER
        self.mlp_lr = MLP_LEARNING_RATE
        self.mlp_decay = MLP_DECAY
        self.mlp_momentum = MLP_MOMENTUM
        self.mlp_dropout = MLP_DROPOUT
        self.mlp_loss_func = MLP_LOSS_FUNCTION
        self.mlp_one_shot = MLP_ONE_SHOT
        self.metrics = ['accuracy']

        super().__init__(TARGET_CLASSES)

        if self.mlp_one_shot:
            self.weights_file = 'LOGS/shared_weights.pkl'
            self.shared_weights = pd.DataFrame({'bigram_id': [], 'weights': []})
            if not os.path.exists(self.weights_file):
                print("Initializing shared weights dictionary...")
                self.shared_weights.to_pickle(self.weights_file)

    def create_model(self, sequence, mlp_input_shape):
        layer_configs = self.decode_sequence(sequence)
        model = Sequential()
        
        if len(mlp_input_shape) > 1:
            model.add_module('flatten', Flatten(mlp_input_shape))
            for i, layer_conf in enumerate(layer_configs):
                if layer_conf == 'dropout':
                    model.add_module('dropout', Dropout(self.mlp_dropout))
                else:
                    model.add_module('linear', Linear(units=layer_conf[0], activation=layer_conf[1])) # 수정 필요
        else:
            for i, layer_conf in enumerate(layer_configs):
                if i == 0:
                    model.add_module('linear', Linear(units=layer_conf[0], activation=layer_conf[1], input_shape=mlp_input_shape)) # 수정 필요
                elif layer_conf == 'dropout':
                    model.add_module('dropout', Dropout(self.mlp_dropout))
                else:
                    model.add_module('linear', Linear(units=layer_conf[0], activation=layer_conf[1])) # 수정 필요
        
        return model

    def compile_model(self, model):
        if self.mlp_optimizer == 'sgd':
            optim = optimizers.SGD(model.parameters(), lr=self.mlp_lr, weight_decay=self.mlp_decay, momentum=self.mlp_momentum)
        else:
            optim = getattr(optimizers, self.mlp_optimizer)(model.parameters(), lr=self.mlp_lr, weight_decay=self.mlp_decay)
        
        return optim

    def update_weights(self, model):
        layer_configs = ['input']
        
        for layer in model.layers:
            if 'flatten' in layer.name:
                layer_configs.append(('flatten'))
            elif 'dropout' not in layer.name:
                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
        
        config_ids = []
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))
        
        j = 0
        for i, layer in enumerate(model.layers):
            if 'dropout' not in layer.name:
                warnings.simplefilter(action='ignore', category=FutureWarning)
                bigram_ids = self.shared_weights['bigram_id'].values
                search_index = []
                for i in range(len(bigram_ids)):
                    if config_ids[j] == bigram_ids[i]:
                        search_index.append(i)
                if len(search_index) == 0:
                    self.shared_weights = self.shared_weights.append({'bigram_id': config_ids[j], 'weights': layer.get_weights()}, ignore_index=True)
                else:
                    self.shared_weights.at[search_index[0], 'weights'] = layer.get_weights()
                
                j += 1
        
        self.shared_weights.to_pickle(self.weights_file)

    def set_model_weights(self, model):
        layer_configs = ['input']
        for layer in model.layers:
            if 'flatten' in layer.name:
                layer_configs.append(('flatten'))
            elif 'dropout' not in layer.name:
                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
        
        config_ids = []
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))
        
        j = 0
        for i, layer in enumerate(model.layers):
            if 'dropout' not in layer.name:
                warnings.simplefilter(action='ignore', category=FutureWarning)
                
                bigram_ids = self.shared_weights['bigram_id'].values
                search_index = []
                for i in range(len(bigram_ids)):
                    if config_ids[j] == bigram_ids[i]:
                        search_index.append(i)
                
                if len(search_index) > 0:
                    print("Transferring weights for layer:", config_ids[j])
                    layer.set_weights(self.shared_weights['weights'].values[search_index[0]])
                
                j += 1

    def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, callbacks=None):
        if self.mlp_one_shot:
            self.set_model_weights(model)
            history = model.fit(x_data, y_data, epochs=nb_epochs, validation_split=validation_split, callbacks=callbacks, verbose=0)
            self.update_weights(model)
        else:
            history = model.fit(x_data, y_data, epochs=nb_epochs, validation_split=validation_split, callbacks=callbacks, verbose=0)
        
        return history

In [6]:
a = MLPGenerator()

Initializing shared weights dictionary...
