<a href="https://colab.research.google.com/github/heros-lab/colaboratory/blob/master/Model_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

!pip install optuna
import optuna
from optuna.integration import KerasPruningCallback

work_path = "/content/drive/My Drive/Colab Notebooks"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
class DatasetClass:
    def __init__(self, data_path):
        self.path = data_path
    
    def __reflect_index(self, data, index):
        if index != None:
            data = data[:, index]
        return data
        
    def __load_df(self, data_label):
        data_x = pd.read_csv(f"{self.path}/{data_label}_nx.csv", index_col=0)
        data_y = pd.read_csv(f"{self.path}/{data_label}_ny.csv", index_col=0)
        return data_x, data_y
    
    def __load_data(self, data_label, x_index, y_index):
        data_x, data_y = self.__load_df(data_label)
        data_x = self.__reflect_index(data_x.values, x_index)
        data_y = self.__reflect_index(data_y.values, y_index)
        return data_x, data_y
    
    def __load_stack(self, dataset_list, x_index, y_index):
        for label in dataset_list:
            tmp_x, tmp_y = self.__load_data(label, x_index, y_index)
            if dataset_list.index(label) == 0:
                data_x = tmp_x
                data_y = tmp_y
            else:
                data_x = np.vstack((data_x, tmp_x))
                data_y = np.vstack((data_y, tmp_y))
        return data_x, data_y
    
    def __load_dict(self, dataset_list, x_index, y_index):
        data_x, data_y = {}, {}
        for label in dataset_list:
            tmp_x, tmp_y = self.__load_data(label, x_index, y_index)
            data_x[label] = tmp_x
            data_y[label] = tmp_y
        return data_x, data_y
    
    def get_data(self, dataset_label, x_index=None, y_index=None, dict_type:bool=False):
        if not dict_type:
            if type(dataset_label) == str:
                data_x, data_y = self.__load_data(dataset_label, x_index, y_index)
            else:
                data_x, data_y = self.__load_stack(dataset_label, x_index, y_index)
        else:
            data_x, data_y = self.__load_dict(dataset_label, x_index, y_index)
        return data_x, data_y
    
    def get_dataframe(self, dataset_list):
        data_x = {}
        data_y = {}
        for label in dataset_list:
            tmp_x, tmp_y = self.__load_df(label)
            data_x[label] = tmp_x
            data_y[label] = tmp_y
        return data_x, data_y


def set_index(model_type):
    if "conv." in model_type:
        x_index = [i for i in range(7)]
    elif model_type == "prop.1":
        x_index = [0,3,4,6]
    elif model_type == "prop.2":
        x_index = [1,2,5,6]
    elif model_type == "prop.3":
        x_index = [1,2,5,6]
    elif model_type == "prop.4":
        x_index = [0,3,4,5,6]
    else:
        print(f"<< {model_type} >> This model is not exist.")
    y_index = [int(model_type[-1])-1]
    return x_index, y_index


def set_dataset_label(model_type):
    type_id = int(model_type[-1])
    if type_id == 1:
        learn = "ms1a"
        test = "ms2a"
    elif type_id == 2:
        learn = "ms3a"
        test = "ms1a"
    elif type_id == 3:
        learn = "ms2a"
        test = "ms3a"
    elif type_id == 4:
        learn = "ms3a"
        test = "ms1a"
    return learn, test


In [8]:
def objective(trial):
    max_unit = 100
    batch_size = 512
    #element = 
    epochs = 200
    sample = 31
    
    num_unit1 = trial.suggest_int(f"num_unit1", 1, max_unit)
    #max_unit2 = int((element - learn_x.shape[1]*num_unit1)/(num_unit1 + 1)) if max_unit2 <= max_unit else max_unit
    #num_unit2 = trial.suggest_int(f"num_unit2", 1, max_unit2)
    num_unit2 = trial.suggest_int(f"num_unit2", 1, max_unit)
    num_units  = [num_unit1, num_unit2]

    score_list = []
    for i in range(sample):
        clear_session()
        print(f"\r#{trial.number:2} -- unit: {num_units}, sampling: {i+1}/{sample}", end="")
        
        model = Sequential()
        model.add(Dense(
                input_dim=learn_x.shape[1], units=num_units[0],
                activation="tanh", kernel_initializer="glorot_uniform"))
        for i in range(len(num_units) - 1):
            model.add(Dense(
                input_dim=num_units[i], units=num_units[i+1],
                activation="tanh", kernel_initializer="glorot_uniform"))
        model.add(Dense(input_dim=num_units[-1], units=1))
        model.compile(loss="mse", optimizer=Adam(lr=0.001))
        model.fit(learn_x, learn_y, batch_size=batch_size, epochs=epochs, verbose=0)
        score = model.evaluate(test_x, test_y, batch_size=test_x.shape[0], verbose=0)
        score_list.append(score)
            
    mean, std = pd.Series(score_list).describe().loc[["mean","std"]]        
    print(f"\r#{trial.number:2} -- unit: {num_units}, mean: {mean:.4e}, std: {std:.4e}")

    return mean

In [9]:
model_tag = "conv.1"
data_path = f"{work_path}/data/norms2"

x_index, y_index = set_index(model_tag)
learn_list, test_list  = set_dataset_label(model_tag)

dataset = DatasetClass(data_path)
learn_x, learn_y = dataset.get_data(learn_list, x_index, y_index)
test_x, test_y = dataset.get_data(test_list, x_index, y_index)

print(f"x_index: {x_index}, y_index: [{y_index[0]}]")
print(f"learn list: {learn_list}, test_list: {test_list}")


x_index: [0, 1, 2, 3, 4, 5, 6], y_index: [0]
learn list: ms1a, test_list: ms2a


In [None]:
study_label = "ver1.4"
STUDY_LOADING = False

storage_path = f"sqlite:///optimize_{model_tag}.db"
study_name = model_tag + "_" + study_label

# study load or create
if STUDY_LOADING:
    study = optuna.load_study(study_name, storage_path, pruner=optuna.pruners.MedianPruner())
else:
    study = optuna.create_study(study_name=study_name, storage=storage_path, direction="minimize", pruner=optuna.pruners.MedianPruner())


In [7]:
study.optimize(objective, n_trials=100)

print("\n")
print("*** All Trial are finished!! ***")

# 0 -- unit: [50, 10], sampling: 31/31

[W 2020-07-28 17:18:11,460] Setting status of trial#0 as TrialState.FAIL because of the following error: NameError("name 'samples' is not defined",)
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/optuna/study.py", line 734, in _run_trial
    result = func(trial)
  File "<ipython-input-4-63aedc4dd569>", line 34, in objective
    print(f"\r#{trial.number:2} -- unit: {num_units}, samples: {samples}/101, mean: {mean:.4e}, std: {std:.4e}")
NameError: name 'samples' is not defined


NameError: ignored