### Automatic model selection

Test notebook for automatic model selection.

In [1]:
import datetime
import logging
import sys
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

#sys.path.append('/Users/davidlaredorazo/Documents/University_of_California/Research/Projects')
sys.path.append('/media/controlslab/DATA/Projects')

import automatic_model_selection
from automatic_model_selection import Configuration
from ann_encoding_rules import Layers
from CMAPSAuxFunctions import TrainValTensorBoard

#Tunable model
from ann_framework.tunable_model.tunable_model import SequenceTunableModelRegression
#from tunable_model import SequenceTunableModelRegression

#Data handlers
from ann_framework.data_handlers.data_handler_CMAPS import CMAPSDataHandler
from ann_framework.data_handlers.data_handler_MNIST import MNISTDataHandler
#from data_handler_MNIST import MNISTDataHandler
#from data_handler_CMAPS import CMAPSDataHandler

import ray
from ray_logger import LoggingActor

ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)

Using TensorFlow backend.
Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-01-30_17-52-37_8003/logs.
Waiting for redis server at 127.0.0.1:61766 to respond...
Waiting for redis server at 127.0.0.1:28987 to respond...
Starting Redis shard with 10.0 GB max memory.
Starting the Plasma object store with 3.435973836 GB memory using /tmp.


{'node_ip_address': None,
 'object_store_address': '/tmp/ray/session_2019-01-30_17-52-37_8003/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-01-30_17-52-37_8003/sockets/raylet',
 'redis_address': '10.34.5.154:61766',
 'webui_url': None}

### Load cmaps data handler

In [2]:
def cmaps_dhandler():

    #Selected as per CNN paper
    features = ['T2', 'T24', 'T30', 'T50', 'P2', 'P15', 'P30', 'Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 'BPR', 
    'farB', 'htBleed', 'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32']
    selected_indices = np.array([2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 17, 20, 21])
    selected_features = list(features[i] for i in selected_indices-1)
    data_folder = '../CMAPSSData'

    window_size = 25
    window_stride = 1
    max_rul = 130

    dHandler_cmaps = CMAPSDataHandler(data_folder, 1, selected_features, max_rul, window_size, window_stride)

    input_shape = (len(selected_features)*window_size, )

    return dHandler_cmaps, input_shape

### Function to save top models

In [3]:
def save_best_models(best_models_list, global_best_model_index, saveto, train_epochs=100):
    
    n_models = len(best_models_list)
    
    for ind_model, i in zip(best_models_list, range(n_models)):
        
        tModel = ind_model.tModel
        kmodel = tModel.model
        model_json = kmodel.to_json()
        
        #Save model's architecture
        string_append = str(i) if i != global_best_model_index else 'global'
        with open(saveto+"bestModel_"+string_append+".json", "w") as json_file:
            json_file.write(model_json)
            
    #Train the global best
    tModel = best_models_list[global_best_model_index].tModel
    print(tModel.data_handler.data_scaler)
        
    tModel.load_data(unroll=True, verbose=1, cross_validation_ratio=0)
    tModel.print_data()
    tModel.epochs = train_epochs

    tModel.train_model(verbose=1)
    tModel.evaluate_model(metrics, round=round)
    kmodel = tModel.model
            
    # serialize weights to HDF5
    kmodel.save_weights(saveto+"bestModel_global.h5")
    
    print("Saved models for dataset 1 to disk")
        

### Test on MNIST

In [4]:
"""Input can be of 3 types, ANN (1), CNN (2) or RNN (3)"""
architecture_type = Layers.FullyConnected
problem_type = 2  #1 for regression, 2 for classification
output_shape = 10 #If regression applies, number of classes
input_shape = (784,)
pop_size = 5
tournament_size = 3
max_similar = 3
total_experiments = 1
count_experiments = 0
unroll = True

global_best_list = []
global_best = None
global_best_index = 0

min_max_scaler = MinMaxScaler(feature_range=(-1, 1))

t = datetime.datetime.now()

logging.basicConfig(filename='logs/nn_evolution_mnist_' + t.strftime('%m%d%Y%H%M%S') + '.log', level=logging.INFO, 
                        format='%(levelname)s:%(threadName)s:%(message)s', datefmt='%m/%d/%Y %H:%M:%S')

raylogger = LoggingActor.remote()

#cmaps datahandler
#dhandler_cmaps, input_shape = cmaps_dhandler()

#mnist datahandler
dHandler_mnist = MNISTDataHandler()

config = Configuration(architecture_type, problem_type, input_shape, output_shape, pop_size, tournament_size, max_similar, epochs=5, cross_val=0.2, size_scaler=0.4,
                       max_generations=10, binary_selection=True, mutation_ratio=0.4, similarity_threshold=0.2, more_layers_prob=0.8)

while count_experiments < total_experiments:
    print("Launching experiment {}".format(count_experiments+1))
    logging.info("Launching experiment {}".format(count_experiments+1))
    best = automatic_model_selection.run_experiment(config, dHandler_mnist, count_experiments + 1, 
                                                    unroll=unroll, verbose_data=0, tModel_scaler=min_max_scaler,
                                                   logging_actor=raylogger)

    """
    global_best_list.append(best)

    if global_best == None:
        global_best = best
    else:
        if best.fitness < global_best.fitness:
            global_best = best
            global_best_index = count_experiments

    """
    logs = ray.get(raylogger.get_logs.remote())
    print(logs)
    
    print("Exiting run experiment")
    count_experiments =  count_experiments + 1
    print("Executed experiments " +str(count_experiments))

"""
print("Global best list\n")
logging.info("Global best list\n")
automatic_model_selection.print_best(global_best_list)

print("Global best is\n")
print(global_best)
logging.info("Global best is\n")
logging.info(global_best)

save_best_models(global_best_list, global_best_index, 'best_models/mnist/', train_epochs=200)
"""

Launching experiment 1
Starting model optimization: Problem type 2, Architecture type Layers.FullyConnected
Parameters:
Input shape: (784,), Output shape: 10, cross_val ratio: 0.2, Generations: 10, Population size: 5, Tournament size: 3, Binary selection: True, Mutation ratio: 0.4, Size scaler: 0.4


Generation 1
similar = 0
{(0, 1): 0.8275832922793668, (0, 2): 1.0, (0, 3): 0.6551822966972979, (0, 4): 0.7400162056555, (1, 2): 0.7895031001996741, (1, 3): 0.9697768243330709, (1, 4): 0.7082451770739364, (2, 3): 0.4844122741981392, (2, 4): 0.5411669686496219, (3, 4): 0.6312913878979918}


Launching experiment 1

Generation 1
Ray fetching to keras and evaluating population
[ObjectID(010000007e31931bd8df795b38f04aca07c4f240), ObjectID(010000001d96e834d3fa41e5a1be7e30a07a6f61), ObjectID(010000008f55eff6d13ccc12abf0143706b45e6f), ObjectID(01000000f936b301111034c8a64a09af8b180c51), ObjectID(01000000286231d090f2427533d2b77580688718)]
[1, 1, 1, 1, 1]
{1: ['scaler', MinMaxScaler(copy=True, feature_range=(-1, 1))], 5: ['scaler', MinMaxScaler(copy=True, feature_range=(-1, 1))], 2: ['scaler', MinMaxScaler(copy=True, feature_range=(-1, 1))], 3: ['scaler', MinMaxScaler(copy=True, feature_range=(-1, 1))], 4: ['scaler', MinMaxScaler(copy=True, feature_range=(-1, 1))]}
Exiting run experiment
Executed experiments 1


'\nprint("Global best list\n")\nlogging.info("Global best list\n")\nautomatic_model_selection.print_best(global_best_list)\n\nprint("Global best is\n")\nprint(global_best)\nlogging.info("Global best is\n")\nlogging.info(global_best)\n\nsave_best_models(global_best_list, global_best_index, \'best_models/mnist/\', train_epochs=200)\n'

### Test on CMAPSS

In [5]:
"""Input can be of 3 types, ANN (1), CNN (2) or RNN (3)"""

"""
architecture_type = Layers.FullyConnected
problem_type = 1  #1 for regression, 2 for classification
output_shape = 1 #If regression applies, number of classes
input_shape = (784,)
pop_size = 5
tournament_size = 3
max_similar = 3
total_experiments = 5
count_experiments = 0
unroll = True

global_best_list = []
global_best = None
global_best_index = 0

min_max_scaler = MinMaxScaler(feature_range=(-1, 1))

t = datetime.datetime.now()

logging.basicConfig(filename='logs/nn_evolution_cmaps_' + t.strftime('%m%d%Y%H%M%S') + '.log', level=logging.INFO, 
                        format='%(levelname)s:%(threadName)s:%(message)s', datefmt='%m/%d/%Y %H:%M:%S')

#cmaps datahandler
dhandler_cmaps, input_shape = cmaps_dhandler()
print(input_shape)

#mnist datahandler
#dHandler_mnist = MNISTDataHandler()

config = Configuration(architecture_type, problem_type, input_shape, output_shape, pop_size, tournament_size, max_similar, epochs=5, cross_val=0.2, size_scaler=0.4,
                       max_generations=10, binary_selection=True, mutation_ratio=0.4, similarity_threshold=0.2, more_layers_prob=0.8)

while count_experiments < total_experiments:
    print("Launching experiment {}".format(count_experiments+1))
    logging.info("Launching experiment {}".format(count_experiments+1))
    best = automatic_model_selection.run_experiment(config, dhandler_cmaps, count_experiments + 1, 
                                                    unroll=unroll, verbose_data=0, tModel_scaler=min_max_scaler)

    global_best_list.append(best)

    if global_best == None:
        global_best = best
    else:
        if best.fitness < global_best.fitness:
            global_best = best
            global_best_index = count_experiments

    count_experiments =  count_experiments + 1

print("Global best list\n")
logging.info("Global best list\n")
automatic_model_selection.print_best(global_best_list)

print("Global best is\n")
print(global_best)
logging.info("Global best is\n")
logging.info(global_best)

save_best_models(global_best_list, global_best_index, 'best_models/cmapss/', train_epochs=200)
"""

'\narchitecture_type = Layers.FullyConnected\nproblem_type = 1  #1 for regression, 2 for classification\noutput_shape = 1 #If regression applies, number of classes\ninput_shape = (784,)\npop_size = 5\ntournament_size = 3\nmax_similar = 3\ntotal_experiments = 5\ncount_experiments = 0\nunroll = True\n\nglobal_best_list = []\nglobal_best = None\nglobal_best_index = 0\n\nmin_max_scaler = MinMaxScaler(feature_range=(-1, 1))\n\nt = datetime.datetime.now()\n\nlogging.basicConfig(filename=\'logs/nn_evolution_cmaps_\' + t.strftime(\'%m%d%Y%H%M%S\') + \'.log\', level=logging.INFO, \n                        format=\'%(levelname)s:%(threadName)s:%(message)s\', datefmt=\'%m/%d/%Y %H:%M:%S\')\n\n#cmaps datahandler\ndhandler_cmaps, input_shape = cmaps_dhandler()\nprint(input_shape)\n\n#mnist datahandler\n#dHandler_mnist = MNISTDataHandler()\n\nconfig = Configuration(architecture_type, problem_type, input_shape, output_shape, pop_size, tournament_size, max_similar, epochs=5, cross_val=0.2, size_scaler

In [6]:
#save_best_models(global_best_list, global_best_index, train_epochs=200)