In [1]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import *
from sklearn.model_selection import train_test_split

import numpy as np
import csv
import time
import math
from multiprocessing import Process, Manager

import skopt
from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence, plot_objective, plot_evaluations

In [2]:
def loss_min_max(loss):
    if math.isnan(loss):
        loss = 1e+5
    else:
        loss = min(float(loss), 1e+5)
        loss = max(float(loss), 1e-10)
    return loss

In [3]:
def load_steal_dataset():
    with open('../../data/chap03/faults.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)

    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 27, 7
    data = np.asarray(rows, dtype='float32')
    x = data[:,:input_cnt]
    y = data[:,-output_cnt:]
    return data,x,y
    
def kfold_data(data):
    import sklearn
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=10, shuffle=True)
    kf.get_n_splits(data)
    kfold_data = {}
    for index, (train_index, test_index) in enumerate(kf.split(data)):
        train_data = data[train_index]
        test_data = data[test_index]
        kfold_data[index] = {'train' : train_data, 'test' : test_data}
    return kfold_data

def kfold_return_train(n_fold, kfold_data):
    output_cnt = 7
    if n_fold not in range(10):
        print('{} is not in range(10)'.format(n_fold))
        raise NameError('Change n_fold')
    train_data = kfold_data[n_fold]['train']
    #test_data = kfold_data[n_fold]['test']
    train_data_input = train_data[:, :-output_cnt]
    train_data_output = train_data[:, -output_cnt:]
    return [train_data_input, train_data_output]
def kfold_return_test(n_fold, kfold_data):
    output_cnt = 7
    if n_fold not in range(10):
        print('{} is not in range(10)'.format(n_fold))
        raise NameError('Change n_fold')
    #train_data = kfold_data[n_fold]['train']
    test_data = kfold_data[n_fold]['test']
    test_data_input = test_data[:, :-output_cnt]
    test_data_output = test_data[:, -output_cnt:]
    return [test_data_input, test_data_output]

## Categorycal Cross-entropy  
$L(y,\hat{y}) = - \sum_{j=0}^M \sum_{i=0}^N (y_{i,j}*\log(\hat{y}_{i,j}))$  

In [4]:
class steel_model:
    def __init__(self,learning_rate = 0.001,n_fold = None):
        self.learning_rate = learning_rate
        self.input_shape = 27 #input_cnt
        self.output_shape = 7 #output_cnt
        
        self.model = self.define_model()
        
        
    def init_data(self, train_data, test_data):
        self.X_train = train_data[0]
        self.Y_train = train_data[1]
        self.X_test = test_data[0]
        self.Y_test = test_data[1]
        
    def define_model(self,verbose = 0):
        x = Input(shape=(self.input_shape))
        y = Dense(self.output_shape, activation='softmax')(x)
        __model = Model(x, y)
        if verbose is not 0: __model.summary()
        return __model
    
    def model_compile(self):
        optimizer = tf.keras.optimizers.SGD(learning_rate=self.learning_rate)
        self.model.compile(optimizer=optimizer,
                           loss=tf.keras.losses.CategoricalCrossentropy(),
                           metrics=['accuracy']
                          )
    
    def model_fit(self,verbose = 0):
        self.model.fit(x = self.X_train, y = self.Y_train,
                       #validation_split=0.05,
                       #shuffle=True,
                       validation_data = (self.X_test, self.Y_test),
                       batch_size = 64,
                       epochs = 30,
                       verbose = verbose
                      )
        
    def model_evaluate(self,verbose=0):
        output = self.model.predict(self.X_test,verbose=verbose)
        test_loss, test_acc = self.model.evaluate(self.X_test, self.Y_test,verbose=verbose)
        
        if verbose is not 0:
            print("acc: {:2.4f}, loss: {:2.4f}".format(test_acc, test_loss))
        
        result_dict = {'acc' : test_acc, 'loss' : test_loss}

        return result_dict
        
 





In [5]:
class steal_model_optimize:
    def __init__(self,learning_rate = 0.01):
        self.input_cnt, self.output_cnt = 27, 7
        self.learning_rate = learning_rate
        
        
        raw_data = self.load_steal_data()
        self._kfold_data = self.kfold_data(raw_data)
        
        
    def model_trainning(self, train_data, test_data, proc_num=None, return_dict = None):
        model_init = steel_model(learning_rate=self.learning_rate)
        model_init.init_data(train_data = train_data,
                             test_data = test_data)
        model_init.model_compile()
        model_init.model_fit(verbose=0)
        model_result_dict = model_init.model_evaluate(verbose=0)
        
        if proc_num is not None and return_dict is not None:
            return_dict[proc_num] = model_result_dict

        return model_result_dict
        
    def model_train_multiprocess(self,):
        kfold_data_set = []
        for index in range(10):
            train_data = self.kfold_return_train(n_fold=index, kfold_data=self._kfold_data)
            test_data = self.kfold_return_test(n_fold=index, kfold_data=self._kfold_data)
            kfold_data_set.append((train_data, test_data))
            
        manager = Manager()
        return_dict = manager.dict()
        
        procs = []
        for index, (train_data, test_data) in enumerate(kfold_data_set):
            #print(train_data[0].shape, test_data[0].shape)
            proc = Process(target=self.model_trainning,
                           args=(train_data, test_data,
                                 index, return_dict),
                           name='{}-fold'.format(index)
                          )
            procs.append(proc)
            proc.start()
            
        for proc in procs:
            proc.join()
            
        return return_dict
    
    
    def kfold_model_evaluate_result(self,return_dict, verbose = 0):
        accuracies = []
        precisions = []
        recalls = []
        f1s = []
        losses = []
        
        for index in range(10):
            acc     = return_dict[index]['acc']
            loss    = return_dict[index]['loss']

            accuracies.append(acc)
            losses.append(loss)
            
        mean_acc    = np.array(accuracies).mean()
        mean_loss = np.array(losses).mean()
        
        return_result_list = [mean_acc, mean_loss]
        #print(return_result_list)
        if verbose is not 0:
            print("acc: {:2.4f}, loss: {:2.4f}".format(*return_result_list))
        

        return return_result_list
        
        
    
    def load_steal_data(self,):
        data, x, y = load_steal_dataset()
        return data
    
    def kfold_data(self, data):
        import sklearn
        from sklearn.model_selection import KFold
        kf = KFold(n_splits=10, shuffle=True)
        kf.get_n_splits(data)
        kfold_data = {}
        for index, (train_index, test_index) in enumerate(kf.split(data)):
            train_data = data[train_index]
            test_data = data[test_index]
            kfold_data[index] = {'train' : train_data, 'test' : test_data}
        return kfold_data

    def kfold_return_train(self, n_fold, kfold_data):
        output_cnt = self.output_cnt
        if n_fold not in range(10):
            print('{} is not in range(10)'.format(n_fold))
            raise NameError('Change n_fold')
        train_data = kfold_data[n_fold]['train']
        #test_data = kfold_data[n_fold]['test']
        train_data_input = train_data[:, :-output_cnt]
        train_data_output = train_data[:, -output_cnt:]
        return [train_data_input, train_data_output]
    
    def kfold_return_test(self, n_fold, kfold_data):
        output_cnt = self.output_cnt
        if n_fold not in range(10):
            print('{} is not in range(10)'.format(n_fold))
            raise NameError('Change n_fold')
        #train_data = kfold_data[n_fold]['train']
        test_data = kfold_data[n_fold]['test']
        test_data_input = test_data[:, :-output_cnt]
        test_data_output = test_data[:, -output_cnt:]
        return [test_data_input, test_data_output]

In [6]:
hp_dict = {
    'learning_rate' : 0.001
}

default_HP = list(hp_dict.values())

def model_tunning(hp_list):
    HP_list2dict = {
    'LEARNING_RATE' : float(hp_list[0])
    }
    print("======================start trainning=======================")
    print(HP_list2dict.items())
    print('\n')
    
    tunning_model = steal_model_optimize(learning_rate = hp_list[0])
    result_dict = tunning_model.model_train_multiprocess()
    result = tunning_model.kfold_model_evaluate_result(result_dict, verbose=1)
    loss = -result[0]
    

    print('\n')
    print("======================end trainning=======================")
    return loss

#dim_RND_MEAN_nodes = Integer(low=-100, high=100, name='RND_MEAN')
#dim_RND_STD_nodes = Real(low=1e-8, high=1.0, prior='log-uniform', name='RND_STD')
dim_learning_rate_nodes = Real(low=1e-6, high=1.0, prior='log-uniform',name='LEARNING_RATE')

dimension_HP = [
                #dim_RND_MEAN_nodes  ,
                #dim_RND_STD_nodes  ,
                dim_learning_rate_nodes
                ]



n_cell = 20
n_random_starts = 10

gp_fitting = gp_minimize(func=model_tunning,
                        dimensions=dimension_HP,
                        n_calls=n_cell,
                        n_random_starts=n_random_starts,
                        acq_func='EI',
                        x0=default_HP
                        )

dict_items([('LEARNING_RATE', 0.001)])


acc: 0.3077, loss: 1815260640.9782


dict_items([('LEARNING_RATE', 2.29321845649845e-05)])


acc: 0.2917, loss: 28732818.3532


dict_items([('LEARNING_RATE', 0.027086650185853486)])


acc: 0.3256, loss: 50993490002.2719


dict_items([('LEARNING_RATE', 6.310234413705417e-06)])


acc: 0.2798, loss: 11121550.7399


dict_items([('LEARNING_RATE', 2.164278482151934e-06)])


acc: 0.3040, loss: 3254972.4486


dict_items([('LEARNING_RATE', 0.2133593719123133)])


acc: 0.3813, loss: 321961579654.8984


dict_items([('LEARNING_RATE', 0.7784617614896534)])


acc: 0.3663, loss: 1258873404725.2192


dict_items([('LEARNING_RATE', 0.01932340377789457)])


acc: 0.3256, loss: 34246919852.8727


dict_items([('LEARNING_RATE', 0.0016615322247186908)])


acc: 0.3163, loss: 2346437471.2304


dict_items([('LEARNING_RATE', 0.00017772611215676338)])


acc: 0.3535, loss: 293373100.0524


dict_items([('LEARNING_RATE', 3.6673726216860864e-05)])


acc: 0.3307, loss: 66291979.



acc: 0.3045, loss: 1568735.2102


dict_items([('LEARNING_RATE', 0.9994979204591179)])


acc: 0.3231, loss: 1597246238559.9280




In [7]:
opt_lr = gp_fitting.x[0]
opt_lr

0.2133593719123133

In [8]:
kfold_trainning = steal_model_optimize(learning_rate=opt_lr)
result_dict = kfold_trainning.model_train_multiprocess()
result =  kfold_trainning.kfold_model_evaluate_result(result_dict, verbose=1)

acc: 0.3550, loss: 313555067580.7592


In [9]:
kfold_trainning = steal_model_optimize(learning_rate=0.001)
result_dict = kfold_trainning.model_train_multiprocess()
result =  kfold_trainning.kfold_model_evaluate_result(result_dict, verbose=1)

acc: 0.3189, loss: 1675220135.4697


## Binary Cross-entropy  
$L(y,\hat{y})=-{1 \over N} \sum_{i=0}^N (y \log (\hat{y}_i) + (1-y) \log(1-\hat{y}_i))$

In [10]:
class steel_model_bi:
    def __init__(self,learning_rate = 0.001,n_fold = None):
        self.learning_rate = learning_rate
        self.input_shape = 27 #input_cnt
        self.output_shape = 7 #output_cnt
        
        self.model = self.define_model()
        
        
    def init_data(self, train_data, test_data):
        self.X_train = train_data[0]
        self.Y_train = train_data[1]
        self.X_test = test_data[0]
        self.Y_test = test_data[1]
        
    def define_model(self,verbose = 0):
        x = Input(shape=(self.input_shape))
        y = Dense(self.output_shape, activation='softmax')(x)
        __model = Model(x, y)
        if verbose is not 0: __model.summary()
        return __model
    
    def model_compile(self):
        optimizer = tf.keras.optimizers.SGD(learning_rate=self.learning_rate)
        self.model.compile(optimizer=optimizer,
                           loss=tf.keras.losses.BinaryCrossentropy(),
                           metrics=['accuracy']
                          )
    
    def model_fit(self,verbose = 0):
        self.model.fit(x = self.X_train, y = self.Y_train,
                       #validation_split=0.05,
                       #shuffle=True,
                       validation_data = (self.X_test, self.Y_test),
                       batch_size = 64,
                       epochs = 10,
                       verbose = verbose
                      )
        
    def model_evaluate(self,verbose=0):
        output = self.model.predict(self.X_test,verbose=verbose)
        test_loss, test_acc = self.model.evaluate(self.X_test, self.Y_test,verbose=verbose)
        
        if verbose is not 0:
            print("acc: {:2.4f}, loss: {:2.4f}".format(test_acc, test_loss))
        
        result_dict = {'acc' : test_acc, 'loss' : test_loss}

        return result_dict
        
 





In [11]:
class steal_model_optimize_bi:
    def __init__(self,learning_rate = 0.01):
        self.input_cnt, self.output_cnt = 27, 7
        self.learning_rate = learning_rate
        
        
        raw_data = self.load_steal_data()
        self._kfold_data = self.kfold_data(raw_data)
        
        
    def model_trainning(self, train_data, test_data, proc_num=None, return_dict = None):
        model_init = steel_model_bi(learning_rate=self.learning_rate)
        model_init.init_data(train_data = train_data,
                             test_data = test_data)
        model_init.model_compile()
        model_init.model_fit(verbose=0)
        model_result_dict = model_init.model_evaluate(verbose=0)
        
        if proc_num is not None and return_dict is not None:
            return_dict[proc_num] = model_result_dict

        return model_result_dict
        
    def model_train_multiprocess(self,):
        kfold_data_set = []
        for index in range(10):
            train_data = self.kfold_return_train(n_fold=index, kfold_data=self._kfold_data)
            test_data = self.kfold_return_test(n_fold=index, kfold_data=self._kfold_data)
            kfold_data_set.append((train_data, test_data))
            
        manager = Manager()
        return_dict = manager.dict()
        
        procs = []
        for index, (train_data, test_data) in enumerate(kfold_data_set):
            #print(train_data[0].shape, test_data[0].shape)
            proc = Process(target=self.model_trainning,
                           args=(train_data, test_data,
                                 index, return_dict),
                           name='{}-fold'.format(index)
                          )
            procs.append(proc)
            proc.start()
            
        for proc in procs:
            proc.join()
            
        return return_dict
    
    
    def kfold_model_evaluate_result(self,return_dict, verbose = 0):
        accuracies = []
        precisions = []
        recalls = []
        f1s = []
        losses = []
        
        for index in range(10):
            acc     = return_dict[index]['acc']
            loss    = return_dict[index]['loss']

            accuracies.append(acc)
            losses.append(loss)
            
        mean_acc    = np.array(accuracies).mean()
        mean_loss = np.array(losses).mean()
        
        return_result_list = [mean_acc, mean_loss]
        #print(return_result_list)
        if verbose is not 0:
            print("acc: {:2.4f}, loss: {:2.4f}".format(*return_result_list))
        

        return return_result_list
        
        
    
    def load_steal_data(self,):
        data, x, y = load_steal_dataset()
        return data
    
    def kfold_data(self, data):
        import sklearn
        from sklearn.model_selection import KFold
        kf = KFold(n_splits=10, shuffle=True)
        kf.get_n_splits(data)
        kfold_data = {}
        for index, (train_index, test_index) in enumerate(kf.split(data)):
            train_data = data[train_index]
            test_data = data[test_index]
            kfold_data[index] = {'train' : train_data, 'test' : test_data}
        return kfold_data

    def kfold_return_train(self, n_fold, kfold_data):
        output_cnt = self.output_cnt
        if n_fold not in range(10):
            print('{} is not in range(10)'.format(n_fold))
            raise NameError('Change n_fold')
        train_data = kfold_data[n_fold]['train']
        #test_data = kfold_data[n_fold]['test']
        train_data_input = train_data[:, :-output_cnt]
        train_data_output = train_data[:, -output_cnt:]
        return [train_data_input, train_data_output]
    
    def kfold_return_test(self, n_fold, kfold_data):
        output_cnt = self.output_cnt
        if n_fold not in range(10):
            print('{} is not in range(10)'.format(n_fold))
            raise NameError('Change n_fold')
        #train_data = kfold_data[n_fold]['train']
        test_data = kfold_data[n_fold]['test']
        test_data_input = test_data[:, :-output_cnt]
        test_data_output = test_data[:, -output_cnt:]
        return [test_data_input, test_data_output]

In [12]:
kfold_trainning = steal_model_optimize_bi(learning_rate=0.001)
result_dict = kfold_trainning.model_train_multiprocess()
result =  kfold_trainning.kfold_model_evaluate_result(result_dict, verbose=1)

acc: 0.7631, loss: 3.6328
