In [1]:
import datetime
import logging
import sys
import numpy as np
import time
import math

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.callbacks import LearningRateScheduler
from keras.optimizers import Adam
import keras.backend as K

#sys.path.append('/Users/davidlaredorazo/Documents/University_of_California/Research/Projects')
sys.path.append('/media/controlslab/DATA/Projects')

import ann_framework.aux_functions as aux_functions

import automatic_model_selection
from automatic_model_selection import Configuration
from automatic_model_selection import run_experiment
from ann_encoding_rules import Layers
import fetch_to_keras

#Tunable model
from ann_framework.tunable_model.tunable_model import SequenceTunableModelRegression, SequenceTunableModelClassification

#Data handlers
from ann_framework.data_handlers.data_handler_DAMADICS import DamadicsDataHandler

learningRate_scheduler = LearningRateScheduler(aux_functions.step_decay)

size_scaler = 0.5

#Use same configuration for all experiments, just change some of the parameters

#Define some random paramaters for the creation of the configuration, this will change for each test model
architecture_type = Layers.FullyConnected
problem_type = 2  #1 for regression, 2 for classification
output_shape = 2 #If regression applies, number of classes

features = ['externalControllerOutput', 'undisturbedMediumFlow', 'pressureValveInlet', 
            'pressureValveOutlet', 'mediumTemperature', 'rodDisplacement', 'disturbedMediumFlow', 
           'selectedFault', 'faultType', 'faultIntensity']

selected_indices = np.array([1,3,4,5,6,7])
selected_features = list(features[i] for i in selected_indices-1)

#Does not work for sequence sizes larger than 1 given the way I'm generating the test data. 
#Need to properly define what the test data is going to be like.
window_size = 2
window_stride = 1

start_date = datetime.datetime(2018, 2, 14, 18, 59, 20)
time_delta = datetime.timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=1, hours=0, weeks=0)
end_date = start_date + 250000*time_delta #get the first 300 instances

input_shape = (window_size * len(selected_features), )

config = Configuration(architecture_type, problem_type, input_shape, output_shape, pop_size=5, 
                       tournament_size=3, max_similar=3, epochs=5, cross_val=0.2, size_scaler=size_scaler,
                       max_generations=10, binary_selection=True, mutation_ratio=0.8, 
                       similarity_threshold=0.2, more_layers_prob=0.4, verbose_individuals=True, 
                       show_model=True, verbose_training=1)


Using TensorFlow backend.


### Given a model get the compiled model

In [2]:
def get_compiled_model(model, problem_type, optimizer_params=[]):
    """Obtain a keras compiled model"""
    
    #Shared parameters for the models
    optimizer = Adam(lr=0.01, beta_1=0.5)
    
    if problem_type == 1:
        lossFunction = "mean_squared_error"
        metrics = ["mse"]
    elif problem_type == 2:
        lossFunction = "categorical_crossentropy"
        metrics = ["accuracy"]
    elif problem_type == 3:
        lossFunction = "binary_crossentropy"
        metrics = ["accuracy"]
    else:
        print("Problem type not defined")
        model = None
        return
    
    #Create and compile the models
    model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)
    
    return model


def create_tunable_model(model_genotype, problem_type, input_shape, data_handler, model_number):
    
    K.clear_session()
    
    model = fetch_to_keras.decode_genotype(model_genotype, problem_type, input_shape, 1)
    
    model = get_compiled_model(model, problem_type, optimizer_params=[])
    
    if problem_type == 1:
        tModel = SequenceTunableModelRegression('ModelReg_SN_'+str(model_number), model, lib_type='keras', data_handler=data_handler)
    else:
        tModel = SequenceTunableModelClassification('ModelClass_SN_'+str(model_number), model, lib_type='keras', data_handler=data_handler)
        
    return tModel

### Function to save top models

In [3]:
def save_best_models(best_models_list, global_best_model_index, saveto, input_shape, data_handler, 
                     problem_type=1, data_scaler=None, train_epochs=100, metrics=[], round=0):
    
    n_models = len(best_models_list)
    
    for ind_model, i in zip(best_models_list, range(n_models)):
        
        tModel = create_tunable_model(ind_model.stringModel, problem_type, input_shape, data_handler, i)
        kmodel = tModel.model
        model_json = kmodel.to_json()
        
        #Save model's architecture
        string_append = str(i) if i != global_best_model_index else 'global'
        with open(saveto+"bestModel_"+string_append+".json", "w") as json_file:
            json_file.write(model_json)
            
    #Train the global best, model has to be recompiled
    ind_model = best_models_list[global_best_model_index]
    tModel = create_tunable_model(ind_model.stringModel, problem_type, input_shape, data_handler, n_models)
    
    print(tModel.model.summary())
    #print(tModel.data_handler)
    
    if tModel.data_handler.data_scaler != None:
        print("Using data handler scaler")
    elif tModel.data_scaler != None:
        print("Using tModel scaler (Overriding data handler scaler)")
    else:
        print("No data scaling used")
    
    if data_scaler != None:
        tModel.data_handler.data_scaler = None
        tModel.data_scaler = data_scaler
        
    tModel.load_data(unroll=True, verbose=1, cross_validation_ratio=0.2,test_ratio=0.1)
    tModel.print_data()
    tModel.epochs = train_epochs

    tModel.train_model(verbose=1)
    
    tModel.evaluate_model(metrics, round=round)
    
    kmodel = tModel.model
            
    # serialize weights to HDF5
    kmodel.save_weights(saveto+"bestModel_global.h5")
    
    print("Saved models for dataset 1 to disk")

### Get global best model

In [4]:
def recompute_globals_fitness(best_models, size_scaler, problem_type):
    """It is necessary to recompute the fiteness of global models since they have differnt normalization factors"""

    #print("Before normalization")
    #automatic_model_selection.print_best(best_models)
    
    normalize_scores(best_models)
    
    #print("After normalization")
    #automatic_model_selection.print_best(best_models)
    
    global_best_index = compute_fitness(best_models, size_scaler, problem_type)
    
    print("Recomputed fitness")
    automatic_model_selection.print_best(best_models)
    print("Global best index")
    print(global_best_index)
    
    return global_best_index


def normalize_scores(best_models):
    
    pop_size = len(best_models)
    raw_scores = np.zeros((pop_size,))
    
    for i in range(pop_size):
        model = best_models[i]
        raw_scores[i] = model.raw_score
        
    normalization_factor = np.linalg.norm(raw_scores)
    normalized_scores = raw_scores/normalization_factor
    
    for i in range(pop_size):
        model = best_models[i]
        model.normalized_score = raw_scores[i]
    
    
def compute_fitness(best_models, size_scaler, problem_type):
    
    pop_size = len(best_models)
    
    global_best_index = 0
    
    for i in range(pop_size):
        
        round_up_to = 3

        #Round up to the first 3 digits before computing log                                                                                                                                                          
        rounding_scaler = 10**round_up_to
        trainable_count = round(best_models[i].raw_size/rounding_scaler)*rounding_scaler
        size_score = math.log10(trainable_count)

        scaled_score = best_models[i].normalized_score

        #For classification estimate the error which is between 0 and 1                                                                                                                   
        if problem_type == 2:
            metric_score = (1 - scaled_score)*10 #Multiply by 10 to have a better scaling. I still need to find an appropriate scaling 
        elif problem_type == 3:
            metric_score == (1 - scaled_score)*10
        else:
            metric_score = scaled_score*10 #Multiply by 10 to have a better scaling. I still need to find an appropiate scaling                                                       
    
        metric_scaler = 1-size_scaler
        print("metric_scaler %f"%metric_scaler)
        print("size scaler %f"%size_scaler)
    
        #Scalarization of multiobjective version of the fitness function                                                                                                                  
        best_models[i].fitness = metric_scaler*metric_score + size_scaler*size_score
        
        if best_models[i].fitness < best_models[global_best_index].fitness:
            global_best_index = i
            
    return global_best_index

### Test on DAMADICS

In [5]:
def damadics_test(damadics_dhandler, input_shape = 12, size_scaler=0.5, total_experiments=1):

    """Input can be of 3 types, ANN (1), CNN (2) or RNN (3)"""
    architecture_type = Layers.FullyConnected
    problem_type = 2  #1 for regression, 2 for classification
    output_shape = 2 #If regression applies, number of classes
    input_shape = (window_size * len(selected_features), )
    """
    pop_size = 5
    tournament_size = 3
    max_similar = 3
    """
    total_experiments = 1
    count_experiments = 0
    unroll = True

    global_best_list = []
    global_best = None
    global_best_index = 0
    
    experiment_times = np.zeros((total_experiments,1))
    
    scaler = None

    t = datetime.datetime.now()

    logging.basicConfig(filename='logs/nn_evolution_damadics_' + t.strftime('%m%d%Y%H%M%S') + '.log', level=logging.INFO, 
                            format='%(levelname)s:%(threadName)s:%(message)s', datefmt='%m/%d/%Y %H:%M:%S')
    

    
#     config = Configuration(architecture_type, problem_type, input_shape, output_shape, pop_size, tournament_size, max_similar, 
#                            epochs=5, cross_val=0.2, size_scaler=size_scaler, max_generations=10, binary_selection=True, 
#                            mutation_ratio=0.4, similarity_threshold=0.2, more_layers_prob=0.8)
    

    config.architecture_type = architecture_type
    config.problem_type = problem_type
    config.input_shape = input_shape
    config.output_shape = output_shape

    while count_experiments < total_experiments:
        print("Launching experiment {}".format(count_experiments+1))
        logging.info("Launching experiment {}".format(count_experiments+1))
        
        start = time.time()
        
        best = automatic_model_selection.run_experiment(config, damadics_dhandler, count_experiments + 1, unroll=unroll,
                                                        learningRate_scheduler=learningRate_scheduler, 
                                                        tModel_scaler=scaler)
        
        end = time.time()
        elapsed_time = (end-start)/60
        experiment_times[count_experiments] = elapsed_time
        print("Experiment time: {} minutes".format(elapsed_time))
        logging.info("Experiment time: {} minutes".format(elapsed_time))
        
        best.individual_label = count_experiments

        global_best_list.append(best)

        if global_best == None:
            global_best = best
        else:
            if best.fitness < global_best.fitness:
                global_best = best
                global_best_index = count_experiments

        count_experiments =  count_experiments + 1
        
    total_experiment_time = experiment_times.sum()

    print("Global best list\n")
    logging.info("Global best list\n")
    automatic_model_selection.print_best(global_best_list)

    print("Global best is\n")
    print(global_best)
    logging.info("Global best is\n")
    logging.info(global_best)
    
    return global_best_list, global_best_index, total_experiment_time

### Perform tests

In [6]:
def run_damadics_test(alphas):

    experiments = 1
    problem_type = 2

    global_best_list = []
    global_best_index = 0
    total_experiment_time = []
    total_experiment_time = 0
    avg_experiment_time = 0

    scaler = MinMaxScaler(feature_range=(-1, 1))

    #DamadicsDataHandler = DamadicsDataHandler()
    input_shape = (window_size * len(selected_features), )
    dhandler_damadics_for_best = DamadicsDataHandler(selected_features, window_size, window_stride, 
                                      start_date=start_date, end_date=end_date, 
                                                     binary_classes=True, one_hot_encode=True)
    dhandler_damadics_for_best.connect_to_db('readOnly', '_readOnly2019', '169.236.181.40', 'damadics')
    #dhandler_damadics_for_best.load_data(unroll=True, verbose=1, start_date=start_date, end_date=end_date)
    #dhandler_damadics_for_best.print_data(print_top=True)

    for size_scaler in alphas:

        print("Running for alpha={}".format(size_scaler))

        global_best_list, global_best_index, total_experiment_time = damadics_test(damadics_dhandler=dhandler_damadics_for_best, 
                                                                                input_shape=input_shape,
                                                                                size_scaler=size_scaler, 
                                                                                total_experiments=experiments)

        print(global_best_list)
        print(global_best_index)

        avg_experiment_time = total_experiment_time/experiments

        print("Total experiment time {}".format(total_experiment_time))
        print("Avg experiment time {}".format(avg_experiment_time))

        save_best_models(global_best_list, global_best_index, 
                         '/media/controlslab/DATA/Projects/ValveActuator-DAMADICS-/code/best_models/alpha{}/'.format(size_scaler), input_shape=input_shape, 
                         data_handler=dhandler_damadics_for_best, problem_type=problem_type, train_epochs=1 , 
                         data_scaler=scaler)
        
        return global_best_list, global_best_index

In [7]:
#alphas = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
alphas = [0.7]

global_best_list, global_best_index = run_damadics_test(alphas)

Connection to mysql+mysqldb://readOnly:_readOnly2019@169.236.181.40/damadics successfull
Running for alpha=0.7
Launching experiment 1

Generation 1
launch new
True
gen similar
False
Fetching model 0 to keras
Evaluating model 0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 968)               12584     
_________________________________________________________________
dense_1 (Dense)              (None, 856)               829464    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1714      
Total params: 843,762
Trainable params: 843,762
Non-trainable params: 0
_________________________________________________________________
None
Loading data for the first time
Reloading data due to parameter change
Loading data for DAMADICS with window_size of 2, stride of 1. Cros-Validation ratio 0.2
Loading d

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
raw size 843762.000000
rounding_scaler 1000.000000
round 844.000000
trainable_count 844000.000000
metric_scaler 0.500000
size scaler 0.500000
Individual 0 score/normalized score/size/fitness 0.6480000023841858/0.6480000023841858/843762/4.723171211391898
raw size 14642.000000
rounding_scaler 1000.000000
round 15.000000
trainable_count 15000.000000
metric_scaler 0.500000
size scaler 0.500000
Individual 1 score/normalized score/size/fitness 0.4160000002384186/0.4160000002384186/14642/5.008045628335748
raw size 137138.000000
rounding_scaler 1000.000000
round 137.000000
trainable_count 137000.000000
metric_scaler 0.500000
size scaler 0.500000
Individual 2 score/normalized score/size/fitness 0.6480000023841858/0.6480000023841858/137138/4.328360271657274
raw size 557946.000000
rounding_scaler 1000.000000
round 558.000000
trainable_count 558000.000000
metric_scaler 0.500000
size scaler 0.500000
Individual 3 score/normalized score/size/fitness 0.352000000

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 4 to keras
Evaluating model 4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 152)               1976      
_________________________________________________________________
dropout_1 (Dropout)          (None, 152)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 872)               133416    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1746      
Total params: 137,138
Trainable params: 137,138
Non-trainable params: 0
_________________________________________________________________
None
Using previously loaded data
training with cv
Train on 197128 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
raw size 358282.000000
rounding_s

Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 3 to keras
Evaluating model 3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 136)               1768      
_________________________________________________________________
dropout_1 (Dropout)          (None, 136)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 832)               113984    
_________________________________________________________________
dense_2 (Dense)              (None, 832)               693056    
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 1666      
Total params: 810,474
Trainable params: 810,474
Non-trainable params: 0
_________________________________________________________________
None
Using previously loaded data
training with cv
Train on 1

Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 2 to keras
Evaluating model 2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 320)               4160      
_________________________________________________________________
dense_1 (Dense)              (None, 872)               279912    
_________________________________________________________________
dropout_1 (Dropout)          (None, 872)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1746      
Total params: 285,818
Trainable params: 285,818
Non-trainable params: 0
_________________________________________________________________
None
Using previously loaded data
training with cv
Train on 197128 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 3 to keras
Evaluating model 

Train on 197128 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 2 to keras
Evaluating model 2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 872)               11336     
_________________________________________________________________
dense_1 (Dense)              (None, 328)               286344    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 658       
Total params: 298,338
Trainable params: 298,338
Non-trainable params: 0
_________________________________________________________________
None
Using previously loaded data
training with cv
Train on 197128 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 3 to keras
Evaluating model 3
_____________________________________________________________

Epoch 4/5
Epoch 5/5
Fetching model 1 to keras
Evaluating model 1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 152)               1976      
_________________________________________________________________
dropout_1 (Dropout)          (None, 152)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 306       
Total params: 2,282
Trainable params: 2,282
Non-trainable params: 0
_________________________________________________________________
None
Using previously loaded data
training with cv
Train on 197128 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fetching model 2 to keras
Evaluating model 2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)  

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
in (Dense)                   (None, 152)               1976      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 306       
Total params: 2,282
Trainable params: 2,282
Non-trainable params: 0
_________________________________________________________________
None
No data scaling used
Loading data for the first time
Reloading data due to parameter change
Loading data for DAMADICS with window_size of 2, stride of 1. Cros-Validation ratio 0.2
Loading data from memory
Data Splitting: 0:00:00.000070


IndexError: list index out of range

In [None]:
for ind in global_best_list:
    
    print(ind)

print(global_best_index)