In [1]:
import os 
import numpy as np
import pandas as pd
import warnings
import pickle
import matplotlib.pyplot as plt
from itertools import groupby
import tensorflow as tf
from keras import Input, Model, Sequential
from keras.layers import Activation, Dense, Flatten, Dropout, LSTM
import keras.utils
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.backend import log

%matplotlib inline

Populating the interactive namespace from numpy and matplotlib


In [4]:
BASE_PATH = 'undemocracy/notebooks/nas/'

In [141]:
########################################################
#                   NAS PARAMETERS                     #
########################################################
CONTROLLER_SAMPLING_EPOCHS = 10
SAMPLES_PER_CONTROLLER_EPOCH = 10
CONTROLLER_TRAINING_EPOCHS = 10
ARCHITECTURE_TRAINING_EPOCHS = 10
CONTROLLER_LOSS_ALPHA = 0.9
REINFORCE_BASELINE = 0.8

########################################################
#               CONTROLLER PARAMETERS                  #
########################################################
CONTROLLER_LSTM_DIM = 100
CONTROLLER_OPTIMIZER = 'Adam'
CONTROLLER_LEARNING_RATE = 0.01
CONTROLLER_DECAY = 0.1
CONTROLLER_MOMENTUM = 0.0
CONTROLLER_USE_PREDICTOR = False

########################################################
#                   MLP PARAMETERS                     #
########################################################
MAX_ARCHITECTURE_LENGTH = 3
MLP_OPTIMIZER = 'Adam'
MLP_LEARNING_RATE = 0.01
MLP_DECAY = 0.0
MLP_MOMENTUM = 0.0
MLP_DROPOUT = 0.2
MLP_LOSS_FUNCTION = 'binary_crossentropy'
MLP_ONE_SHOT = True

########################################################
#                   DATA PARAMETERS                    #
########################################################
TARGET_CLASSES = 2

########################################################
#                  OUTPUT PARAMETERS                   #
########################################################
TOP_N = 5

In [142]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = numpy.random.permutation(len(a))
    return a[p], b[p]

## MLP Search Space Class

In [143]:
class MLPSearchSpace(object):

    def __init__(self, target_classes):

        self.target_classes = target_classes
        self.vocab = self.vocab_dict()

    def vocab_dict(self):
    	# define the allowed nodes and activation functions
        nodes = [8, 12, 16, 24, 32, 48] #consider making these numbers larger
        act_funcs = ['sigmoid', 'tanh', 'relu', 'elu']
        
        # initialize lists for keys and values of the vocabulary
        layer_params = []
        layer_id = []
        
        # for all activation functions for each node
        for i in range(len(nodes)):
            for j in range(len(act_funcs)):
            	
                # create an id and a configuration tuple (node, activation)
                layer_params.append((nodes[i], act_funcs[j]))
                layer_id.append(len(act_funcs) * i + j + 1)
        
        # zip the id and configurations into a dictionary
        vocab = dict(zip(layer_id, layer_params))
        
        # add dropout in the volcabulary
        vocab[len(vocab) + 1] = (('dropout'))
        
        # add the final softmax/sigmoid layer in the vocabulary
        if self.target_classes == 2:
            vocab[len(vocab) + 1] = (self.target_classes - 1, 'sigmoid')
        else:
            vocab[len(vocab) + 1] = (self.target_classes, 'softmax')
        return vocab


	# function to encode a sequence of configuration tuples
    def encode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        encoded_sequence = []
        for value in sequence:
            encoded_sequence.append(keys[values.index(value)])
        return encoded_sequence


	# function to decode a sequence back to configuration tuples
    def decode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        decoded_sequence = []
        for key in sequence:
            decoded_sequence.append(values[keys.index(key)])
        return decoded_sequence

## MLP Generator

In [144]:
class MLPGenerator(MLPSearchSpace):

    def __init__(self):

        self.target_classes = TARGET_CLASSES
        self.mlp_optimizer = MLP_OPTIMIZER
        self.mlp_lr = MLP_LEARNING_RATE
        self.mlp_decay = MLP_DECAY
        self.mlp_momentum = MLP_MOMENTUM
        self.mlp_dropout = MLP_DROPOUT
        self.mlp_loss_func = MLP_LOSS_FUNCTION
        self.mlp_one_shot = MLP_ONE_SHOT
        self.metrics = ['accuracy']

        super().__init__(TARGET_CLASSES)

        if self.mlp_one_shot:
	
            # path to shared weights file 
            self.weights_file = BASE_PATH + 'LOGS/shared_weights.pkl'
    
            # open an empty dataframe with columns for bigrams IDs and weights
            self.shared_weights = pd.DataFrame({'bigram_id': [], 'weights': []})
        
            # pickle the dataframe
            if not os.path.exists(self.weights_file):
                print("Initializing shared weights dictionary...")
                self.shared_weights.to_pickle(self.weights_file)


    # function to create a keras model given a sequence and input data shape
    def create_model(self, sequence, mlp_input_shape):

        # decode sequence to get nodes and activations of each layer
        layer_configs = self.decode_sequence(sequence)

        # create a sequential model
        model = Sequential()

        # add a flatten layer if the input is 3 or higher dimensional
        if len(mlp_input_shape) > 1:
            model.add(Flatten(name='flatten', input_shape=mlp_input_shape))

            # for each element in the decoded sequence
            for i, layer_conf in enumerate(layer_configs):

                # add a model layer (Dense or Dropout)
                if layer_conf is 'dropout':
                    model.add(Dropout(self.mlp_dropout, name='dropout'))
                else:
                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1]))

        else:
            # for 2D inputs
            for i, layer_conf in enumerate(layer_configs):

                # add the first layer (requires the input shape parameter)
                if i == 0:
                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1], input_shape=mlp_input_shape))

                # add subsequent layers (Dense or Dropout)
                elif layer_conf is 'dropout':
                    model.add(Dropout(self.mlp_dropout, name='dropout'))
                else:
                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1]))

        # return the keras model
        return model

    # function to compile the model with the appropriate optimizer and loss function
    def compile_model(self, model):

        # get optimizer
        if self.mlp_optimizer == 'sgd':
            optim = tf.optimizers.SGD(learning_rate=self.mlp_lr, decay=self.mlp_decay, momentum=self.mlp_momentum)
        elif self.mlp_optimizer == 'Adam':
            optim = tf.optimizers.Adam(learning_rate=self.mlp_lr, decay=self.mlp_decay)
        else:
            optim = getattr(tf.optimizers, self.mlp_optimizer)(learning_rate=self.mlp_lr, decay=self.mlp_decay)

        # compile model 
        model.compile(loss=self.mlp_loss_func, optimizer=optim, metrics=self.metrics)

        # return the compiled keras model
        return model

    # --------------  ONESHOT STUFF --------------

    def set_model_weights(self, model):
    
        # get nodes and activations for each layer    
        layer_configs = ['input']
        for layer in model.layers:
            
            # add flatten since it affects the size of the weights
            if 'flatten' in layer.name:
                layer_configs.append(('flatten'))
            
            # don't add dropout since it doesn't affect weight sizes or activations
            elif 'dropout' not in layer.name:
                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
        
        # get bigrams of relevant layers for weights transfer
        config_ids = []
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))
        
        # for all layers
        j = 0
        for i, layer in enumerate(model.layers):
            if 'dropout' not in layer.name:
                warnings.simplefilter(action='ignore', category=FutureWarning)
                
                # get all bigram values we already have weights for
                bigram_ids = self.shared_weights['bigram_id'].values
                
                # check if a bigram already exists in the dataframe
                search_index = []
                for i in range(len(bigram_ids)):
                    if config_ids[j] == bigram_ids[i]:
                        search_index.append(i)
                
                # set layer weights if there is a bigram match in the dataframe 
                if len(search_index) > 0:
                    print("Transferring weights for layer:", config_ids[j])
                    layer.set_weights(self.shared_weights['weights'].values[search_index[0]])
                j += 1
        
    def update_weights(self, model):

        # get nodes and activations for each layer
        layer_configs = ['input']
        for layer in model.layers:
            
            # add flatten since it affects the size of the weights
            if 'flatten' in layer.name:
                layer_configs.append(('flatten'))
            
            # don't add dropout since it doesn't affect weight sizes or activations
            elif 'dropout' not in layer.name:
                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
        
        # get bigrams of relevant layers for weights transfer
        config_ids = []
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))
        
        # for all layers
        j = 0
        for i, layer in enumerate(model.layers):
            if 'dropout' not in layer.name:
                warnings.simplefilter(action='ignore', category=FutureWarning)
                
                #get all bigram values we already have weights for
                bigram_ids = self.shared_weights['bigram_id'].values
                
                # check if a bigram already exists in the dataframe
                search_index = []
                for i in range(len(bigram_ids)):
                    if config_ids[j] == bigram_ids[i]:
                        search_index.append(i)
                
                # add weights to df in a new row if weights aren't already available
                if len(search_index) == 0:
                    self.shared_weights = self.shared_weights.append({'bigram_id': config_ids[j],
                                                                    'weights': layer.get_weights()},
                                                                    ignore_index=True)
                # else update weights 
                else:
                    self.shared_weights.at[search_index[0], 'weights'] = layer.get_weights()
                j += 1
        self.shared_weights.to_pickle(self.weights_file)

    def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, callbacks=None):
        if self.mlp_one_shot:
            self.set_model_weights(model)
            history = model.fit(x_data,
                                y_data,
                                epochs=nb_epochs,
                                validation_split=validation_split,
                                callbacks=callbacks,
                                verbose=0)
            self.update_weights(model)
        else:
            history = model.fit(x_data,
                                y_data,
                                epochs=nb_epochs,
                                validation_split=validation_split,
                                callbacks=callbacks,
                                verbose=0)
        return history

## LSTM RNN Controller

In [145]:
class Controller(MLPSearchSpace):

    def __init__(self):
		
        # defining training and sequence creation related parameters
        self.max_len = MAX_ARCHITECTURE_LENGTH
        self.controller_lstm_dim = CONTROLLER_LSTM_DIM
        self.controller_optimizer = CONTROLLER_OPTIMIZER
        self.controller_lr = CONTROLLER_LEARNING_RATE
        self.controller_decay = CONTROLLER_DECAY
        self.controller_momentum = CONTROLLER_MOMENTUM
        self.use_predictor = CONTROLLER_USE_PREDICTOR
        
        # file path of controller weights to be stored at
        self.controller_weights = BASE_PATH + 'LOGS/controller_weights.h5'

        # initializing a list for all the sequences created
        self.seq_data = []

        # inheriting from the search space
        super().__init__(TARGET_CLASSES)

        # number of classes for the controller (+ 1 for padding)
        self.controller_classes = len(self.vocab) + 1

    def control_model(self, controller_input_shape, controller_batch_size):
        #main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input')
        main_input = Input(shape=controller_input_shape,name='main_input')
        x = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x)
        model = Model(inputs=[main_input], outputs=[main_output])
        return model

    # HYBRID LSTM CONTROLLER: 
    #  - main output is the encoded sequence to be passed to the MPL Generator
    #  - accuracy predictor output adds adversarial element to LSTM 
   
    def hybrid_control_model(self, controller_input_shape, controller_batch_size):
        # input layer initialized with input shape and batch size
        main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input')
        
        # LSTM layer
        x1 = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        # output for the sequence generator network
        main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x1)

        # LSTM layer
        x2 = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        # single neuron sigmoid layer for accuracy prediction
        predictor_output = Dense(1, activation='sigmoid', name='predictor_output')(x2)
        
        # finally the Keras Model class is used to create a multi-output model
        model = Model(inputs=[main_input], outputs=[main_output, predictor_output])
        return model
    
    def train_control_model(self, model, x_data, y_data, loss_func, controller_batch_size, nb_epochs):
        # get the optimizer required for training
        if self.controller_optimizer == 'sgd':
            optim = tf.optimizers.SGD(learning_rate=self.controller_lr,
                                decay=self.controller_decay,
                                momentum=self.controller_momentum)
        elif self.controller_optimizer == 'Adam':
            optim = tf.optimizers.Adam(learning_rate=self.controller_lr,
                                decay=self.controller_decay)
        else:
            optim = getattr(tf.optimizers, self.controller_optimizer)(learning_rate=self.controller_lr, 
                                                    decay=self.controller_decay)
                                                    
        # compile model depending on loss function and optimizer provided
        model.compile(optimizer=optim, loss={'main_output': loss_func})
        
        # load controller weights
        if os.path.exists(self.controller_weights):
            model.load_weights(self.controller_weights)
            
        # train the controller
        print("TRAINING CONTROLLER...")
        model.fit({'main_input': x_data},
                {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes)},
                epochs=nb_epochs,
                batch_size=controller_batch_size,
                verbose=0)
        
        # save controller weights
        model.save_weights(self.controller_weights)
    
    def train_hybrid_model(self, model, x_data, y_data, loss_func, controller_batch_size, nb_epochs):
        print('Hybrid model not supported.')
        # # get the optimizer required for training
        # if self.controller_optimizer == 'sgd':
        #     optim = optimizers.SGD(learning_rate=self.controller_lr,
        #                         decay=self.controller_decay,
        #                         momentum=self.controller_momentum)
        # else:
        #     optim = getattr(optimizers, self.controller_optimizer)(learning_rate=self.controller_lr, 
        #                                             decay=self.controller_decay)
                                                    
        # # compile model depending on loss function and optimizer provided
        # model.compile(optimizer=optim,
        #             loss={'main_output': loss_func, 'predictor_output': 'mse'},
        #             loss_weights={'main_output': 1, 'predictor_output': 1})

        # # load controller weights
        # if os.path.exists(self.controller_weights):
        #     model.load_weights(self.controller_weights)
            
        # # train the controller
        # print("TRAINING CONTROLLER...")
        # model.fit({'main_input': x_data},
        #         {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes),
        #         'predictor_output': np.array(pred_target).reshape(len(pred_target), 1, 1)},
        #         epochs=nb_epochs,
        #         batch_size=controller_batch_size,
        #         verbose=0)
        
        # # save controller weights
        # model.save_weights(self.controller_weights)
    
    def sample_architecture_sequences(self, model, number_of_samples):
        # define values needed for sampling 
        final_layer_id = len(self.vocab)
        dropout_id = final_layer_id - 1
        vocab_idx = [0] + list(self.vocab.keys())
        
        # initialize list for architecture samples
        samples = []
        print("GENERATING ARCHITECTURE SAMPLES...")
        print('------------------------------------------------------')
        
        # while number of architectures sampled is less than required
        while len(samples) < number_of_samples:
            
            # initialise the empty list for architecture sequence
            seed = []
            
            # while len of generated sequence is less than maximum architecture length
            while len(seed) < self.max_len:
                
                # pad sequence for correctly shaped input for controller
                sequence = pad_sequences([seed], maxlen=self.max_len - 1, padding='post')
                sequence = sequence.reshape(1, 1, self.max_len - 1)
                
                # given the previous elements, get softmax distribution for the next element
                if self.use_predictor:
                    (probab, _) = model.predict(sequence)
                else:
                    probab = model.predict(sequence)
                probab = probab[0][0]
                
                # sample the next element randomly given the probability of next elements (the softmax distribution)
                next = np.random.choice(vocab_idx, size=1, p=probab)[0]
                
                # first layer isn't dropout
                if next == dropout_id and len(seed) == 0:
                    continue
                # first layer is not final layer
                if next == final_layer_id and len(seed) == 0:
                    continue
                # if final layer, break out of inner loop
                if next == final_layer_id:
                    seed.append(next)
                    break
                # if sequence length is 1 less than maximum, add final
                # layer and break out of inner loop
                if len(seed) == self.max_len - 1:
                    seed.append(final_layer_id)
                    break
                # ignore padding
                if not next == 0:
                    seed.append(next)
            
            # check if the generated sequence has been generated before.
            # if not, add it to the sequence data. 
            if seed not in self.seq_data:
                samples.append(seed)
                self.seq_data.append(seed)
        return samples
    
    def get_predicted_accuracies_hybrid_model(self, model, seqs):
        pred_accuracies = []        
        for seq in seqs:
            # pad each sequence
            control_sequences = pad_sequences([seq], maxlen=self.max_len, padding='post')
            xc = control_sequences[:, :-1].reshape(len(control_sequences), 1, self.max_len - 1)
            # get predicted accuracies
            (_, pred_accuracy) = [x[0][0] for x in model.predict(xc)]
            pred_accuracies.append(pred_accuracy[0])
        return pred_accuracies    

## Combined MLPNAS System

In [146]:
class MLPNAS(Controller):

    def __init__(self, x, y):

        self.x = x
        self.y = y
        self.target_classes = TARGET_CLASSES
        self.controller_sampling_epochs = CONTROLLER_SAMPLING_EPOCHS
        self.samples_per_controller_epoch = SAMPLES_PER_CONTROLLER_EPOCH
        self.controller_train_epochs = CONTROLLER_TRAINING_EPOCHS
        self.architecture_train_epochs = ARCHITECTURE_TRAINING_EPOCHS
        self.controller_loss_alpha = CONTROLLER_LOSS_ALPHA

        self.data = []
        self.nas_data_log = BASE_PATH + 'LOGS/nas_data.pkl'

        super().__init__()

        self.model_generator = MLPGenerator()

        self.controller_batch_size = len(self.data)
        self.controller_input_shape = (1, MAX_ARCHITECTURE_LENGTH - 1)
        if self.use_predictor:
            self.controller_model = self.hybrid_control_model(self.controller_input_shape, self.controller_batch_size)
        else:
            self.controller_model = self.control_model(self.controller_input_shape, self.controller_batch_size)

    
    # create architectures using encoded sequences we got from the controller
    def create_architecture(self, sequence):
        # create the model using the model generator
        model = self.model_generator.create_model(sequence, np.shape(self.x[0]))
        
        # compile said model
        model = self.model_generator.compile_model(model)
        return model

    # train the generated architecture
    def train_architecture(self, model):
        
        # shuffle the x and y data
        x, y = unison_shuffled_copies(self.x, self.y)
        
        # train the model
        history = self.model_generator.train_model(model, x, y, self.architecture_train_epochs)
        return history
    
    def append_model_metrics(self, sequence, history, pred_accuracy=None):
		
        # if the MLP models are trained only for a single epoch
        if len(history.history['val_accuracy']) == 1:
        	
            # if an accuracy predictor is used
            if pred_accuracy:
                self.data.append([sequence,
                                  history.history['val_accuracy'][0],
                                  pred_accuracy])
            
            # if no accuracy predictor data available
            else:
                self.data.append([sequence,
                                  history.history['val_accuracy'][0]])
            print('validation accuracy: ', history.history['val_accuracy'][0])
        
        # if the MLP models are trained for more than one epoch
        else:
        	
            # take a moving average of validation accuracy across epochs
            val_acc = np.ma.average(history.history['val_accuracy'],
                                    weights=np.arange(1, len(history.history['val_accuracy']) + 1),
                                    axis=-1)
            
            # add predicted accuracies if available else don't
            if pred_accuracy:
                self.data.append([sequence,
                                  val_acc,
                                  pred_accuracy])
            else:
                self.data.append([sequence,
                                  val_acc])
            print('validation accuracy: ', val_acc)
    

    def prepare_controller_data(self, sequences):
        
        # pad generated sequences to maximum length
        controller_sequences = pad_sequences(sequences, maxlen=self.max_len, padding='post')
        
        # split into inputs and labels for LSTM controller
        xc = controller_sequences[:, :-1].reshape(len(controller_sequences), 1, self.max_len - 1)
        yc = to_categorical(controller_sequences[:, -1], self.controller_classes)
        
        # get validation accuracies for each for reward function
        val_acc_target = [item[1] for item in self.data]
        return xc, yc, val_acc_target

    def get_discounted_reward(self, rewards):
        
        # initialise discounted reward array
        discounted_r = np.zeros_like(rewards, dtype=np.float32)
        
        # every element in the discounted reward array
        for t in range(len(rewards)):
            running_add = 0.
            exp = 0.
            
            # will need us to iterate over all rewards from t to T
            for r in rewards[t:]:
                running_add += self.controller_loss_alpha**exp * r
                exp += 1
            
            # add values to the discounted reward array
            discounted_r[t] = running_add
        
        # normalize discounted reward array
        discounted_r = (discounted_r - discounted_r.mean()) / discounted_r.std()
        return discounted_r

	# loss function based on discounted reward for policy gradients
    def custom_loss(self, target, output):
        
        # define baseline for rewards and subtract it from all validation accuracies to get reward. 
        baseline = REINFORCE_BASELINE
        reward = np.array([item[1] - baseline for item in self.data[-self.samples_per_controller_epoch:]]).reshape(
            self.samples_per_controller_epoch, 1)
        
        # get discounted reward
        discounted_reward = self.get_discounted_reward(reward)
        
        # multiply discounted reward by log likelihood of actions to get loss function
        loss = - log(output) * discounted_reward[:, None]
        return loss
    
    # calls controller training function from above
    def train_controller(self, model, x, y, pred_accuracy=None):
        if self.use_predictor:
            self.train_hybrid_model(model,
                                    x,
                                    y,
                                    pred_accuracy,
                                    self.custom_loss,
                                    len(self.data),
                                    self.controller_train_epochs)
        else:
            self.train_control_model(model,
                                     x,
                                     y,
                                     self.custom_loss,
                                     len(self.data),
                                     self.controller_train_epochs)
    
    # Main NAS Loop 

    def search(self):
    
        # for every controller epoch
        for controller_epoch in range(self.controller_sampling_epochs):
            
            # generate sequences
            sequences = self.sample_architecture_sequences(self.controller_model, self.samples_per_controller_epoch)
            
            # if using a predictor, predict their accuracies
            if self.use_predictor:
                pred_accuracies = self.get_predicted_accuracies_hybrid_model(self.controller_model, sequences)
            
            # for each sequence generated in a controller epoch
            for i, sequence in enumerate(sequences):
                
                # create an MLP model 
                model = self.create_architecture(sequence)
                
                # train said MLP model
                history = self.train_architecture(model)
                
                # log the model metrics
                if self.use_predictor:
                    self.append_model_metrics(sequence, history, pred_accuracies[i])
                else:
                    self.append_model_metrics(sequence, history)
                            
            # prepare data for the controller
            xc, yc, val_acc_target = self.prepare_controller_data(sequences)
            
            # train the controller
            self.train_controller(self.controller_model,
                                  xc,
                                  yc,
                                  val_acc_target[-self.samples_per_controller_epoch:])
        
        # save all the NAS logs in a pickle file
        with open(self.nas_data_log, 'wb') as f:
            pickle.dump(self.data, f)
        
        return self.data


In [153]:
# NAS Eval Code
    
# def get_latest_event_id():
#     all_subdirs = [BASE_PATH + 'LOGS/' + d for d in os.listdir('LOGS') if os.path.isdir('LOGS/' + d)]
#     latest_subdir = max(all_subdirs, key=os.path.getmtime)
#     return int(latest_subdir.replace('LOGS/event', ''))

def load_nas_data():
    # event = get_latest_event_id()
    data_file = BASE_PATH + 'LOGS/nas_data.pkl'
    with open(data_file, 'rb') as f:
        data = pickle.load(f)
    return data


def sort_search_data(nas_data):
    val_accs = [item[1] for item in nas_data]
    sorted_idx = np.argsort(val_accs)[::-1]
    nas_data = [nas_data[x] for x in sorted_idx]
    return nas_data

def get_top_n_architectures(n):
    data = load_nas_data()
    data = sort_search_data(data)
    search_space = MLPSearchSpace(TARGET_CLASSES)
    print('Top {} Architectures:'.format(n))
    for seq_data in data[:n]:
        print('Architecture', search_space.decode_sequence(seq_data[0]))
        print('Validation Accuracy:', seq_data[1])

def get_nas_accuracy_plot():
    data = load_nas_data()
    accuracies = [x[1] for x in data]
    plt.plot(np.arange(len(data)), accuracies)
    plt.show()


def get_accuracy_distribution():
    event = get_latest_event_id()
    data = load_nas_data()
    accuracies = [x[1]*100. for x in data]
    accuracies = [int(x) for x in accuracies]
    sorted_accs = np.sort(accuracies)
    count_dict = {k: len(list(v)) for k, v in groupby(sorted_accs)}
    plt.bar(list(count_dict.keys()), list(count_dict.values()))
    plt.show()

In [148]:
new_data = pd.read_csv(BASE_PATH + 'new_data.csv')
dataset = new_data[['Median Age', 'White', 'Black', 'AI/AN', 'PI', 'Other', 'Two Plus', 'Emp-LF Ratio', 'High School', 'Bachelor', 'Advanced', '% households w/ seniors', '% poverty', 'party_democrat', 'party_republican', 'inc_democrat', 'inc_republican', 'winner']]

In [149]:
X = dataset.iloc[:,0:16].values
Y = dataset.iloc[:,17].values

In [150]:
nas_object = MLPNAS(X, Y)
data = nas_object.search()

GENERATING ARCHITECTURE SAMPLES...
------------------------------------------------------
validation accuracy:  0.7911344929174944
validation accuracy:  0.912847492911599
validation accuracy:  0.9206611470742659
validation accuracy:  0.9298272013664246
validation accuracy:  0.9304282578555021
validation accuracy:  0.9143501043319702
validation accuracy:  0.9250187982212413
Transferring weights for layer: ('input', (32, 'tanh'))
validation accuracy:  0.9035311915657737
validation accuracy:  0.9087903954766013
Transferring weights for layer: ((32, 'elu'), (1, 'sigmoid'))
validation accuracy:  0.8879038409753279
TRAINING CONTROLLER...
GENERATING ARCHITECTURE SAMPLES...
------------------------------------------------------
Transferring weights for layer: ('input', (12, 'elu'))
validation accuracy:  0.8949662035161798
validation accuracy:  0.8802404241128401
Transferring weights for layer: ((12, 'elu'), (1, 'sigmoid'))
validation accuracy:  0.9056348638101057
Transferring weights for layer

In [154]:
get_top_n_architectures(TOP_N)

Top 5 Architectures:
Architecture [(12, 'tanh'), (8, 'relu'), (1, 'sigmoid')]
Validation Accuracy: 0.972351610660553
Architecture [(12, 'sigmoid'), (24, 'relu'), (1, 'sigmoid')]
Validation Accuracy: 0.9636363571340387
Architecture [(24, 'sigmoid'), (12, 'tanh'), (1, 'sigmoid')]
Validation Accuracy: 0.9603305697441101
Architecture [(32, 'elu'), (32, 'elu'), (1, 'sigmoid')]
Validation Accuracy: 0.9541698076508262
Architecture [(8, 'sigmoid'), (8, 'elu'), (1, 'sigmoid')]
Validation Accuracy: 0.952366647937081
