<a href="https://colab.research.google.com/github/heros-lab/colaboratory/blob/master/Model_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

from google.colab import drive
drive.mount('/content/drive')

work_path = "/content/drive/My Drive/Colab Notebooks"


In [0]:
!pip install optuna

import optuna
from optuna.integration import KerasPruningCallback


In [0]:
x_index = [0,1,2,3,4,5,6] # model_n1, n2
#x_index = [0,3,4,6] # model_s1
#x_index = [1,2,5,6] # model_s2
y_index = [2]

learn_name = "ms2a"
test_name  = "ms1a"

learn_x = np.load(f"{work_path}/learn/{learn_name}_x.npy")[:, x_index]
learn_y = np.load(f"{work_path}/learn/{learn_name}_y.npy")[:, y_index]

test_x = np.load(f"{work_path}/learn/{test_name}_x.npy")[:, x_index]
test_y = np.load(f"{work_path}/learn/{test_name}_y.npy")[:, y_index]


In [0]:
class optimize_manager:
    def __init__(self, study_name):
        self.score_path  = f"{work_path}/score_{study_name}.csv"
        self.result_path = f"{work_path}/result_{study_name}.csv"

    def filtering_with_IQR(self, data_list):
        pd_series = pd.Series(data_list)
        q1 = pd_series.quantile(.25)
        q3 = pd_series.quantile(.75)
        iqr = q3 - q1
        lim_upper = q3 + iqr*1.5
        lim_lower = q1 - iqr*1.5
        return pd_series[pd_series.apply(lambda x:lim_lower < x < lim_upper)]    

    def save_scores(self, count, unit1, unit2, data_list):
        with open(self.score_path, "w" if count == 0 else "a") as file:
            file.write(f"#{count}, {unit1}, {unit2}")
            for data in data_list:
                file.write(f", {data:.6e}")
            file.write("\n")

    def save_results(self, count, unit1, unit2, samples, mean, std, mean_f, std_f):
        if count == 0:
            with open(self.result_path, "w") as file:
                file.write("Trials, Unit-1, Unit-2, Samples(Full:101), Estimated loss(mean), Standard-deviation, Estimated loss(filtering-mean), Standard-deviation(filtering)\n")
                file.write(f"#{count}, {unit1}, {unit2}, {samples}, {mean:.6e}, {std:.6e}, {mean_f:.6e}, {std_f:.6e}\n")
        else:
            with open(self.result_path, "a") as file:
                file.write(f"#{count}, {unit1}, {unit2}, {samples}, {mean:.6e}, {std:.6e}, {mean_f:.6e}, {std_f:.6e}\n")

    def objective(self, trial):
        epochs = 200
        num_batch = 256
        num_sample = 101
        num_unit1 = trial.suggest_int("num_unit1", 1, 200)
        num_unit2 = trial.suggest_int("num_unit2", 1, 200)

        score_list = []
        for i in range(num_sample):
            clear_session()
            print(f"\r#{trial.number:2} -- unit: [{num_unit1}, {num_unit2}], sampling: {i+1}/{num_sample}", end="")
            
            model = Sequential([
                Dense(input_dim=learn_x.shape[1], units=num_unit1, activation="tanh", kernel_initializer="glorot_uniform"),
                Dense(input_dim=num_unit1, units=num_unit2, activation="tanh", kernel_initializer="glorot_uniform"),
                Dense(input_dim=num_unit2, units=1)])
            model.compile(loss="mse", optimizer=Adam(lr=0.001))
            model.fit(learn_x, learn_y, batch_size=num_batch, epochs=epochs, verbose=0)
            score = model.evaluate(test_x, test_y, batch_size=test_x.shape[0], verbose=0)
            score_list.append(score)
        
        # フィルタ処理
        score_list_flt = self.filtering_with_IQR(score_list)
        
        # 平均と標準偏差の算出
        mean, std = pd.Series(score_list).describe().loc[["mean","std"]]        
        samples, mean_f, std_f = score_list_flt.describe().loc[["count","mean","std"]]
        
        # 保存＆結果出力
        self.save_scores(trial.number, num_unit1, num_unit2, score_list)
        self.save_results(trial.number, num_unit1, num_unit2, samples, mean, std, mean_f, std_f)
        print(f"\r#{trial.number:2} -- unit: [{num_unit1}, {num_unit2}], samples: {samples}/101, mean: {mean:.4e}, std: {std:.4e}")

        return mean

In [16]:
STUDY_LOADING = False

storage_path = f"sqlite:///{work_path}/opt_model_n2.db"
study_name = "n2_ver0.4"
manager = optimize_manager(study_name)


# study load or create
if STUDY_LOADING:
    study = optuna.load_study(study_name, storage_path, pruner=optuna.pruners.MedianPruner())
else:
    study = optuna.create_study(study_name=study_name, storage=storage_path, direction="minimize", pruner=optuna.pruners.MedianPruner())


[I 2020-06-16 09:18:40,504] A new study created with name: n2_ver0.4


In [0]:
study.optimize(manager.objective, n_trials=50)

print("\n")
print("*** All Trial are finished!! ***")

# 0 -- unit: [48, 61], samples: 95.0/101, mean: 6.2212e-02, std: 1.1711e-01


[I 2020-06-16 09:43:50,996] Finished trial#0 with value: 0.06221161557748766 with parameters: {'num_unit1': 48, 'num_unit2': 61}. Best is trial#0 with value: 0.06221161557748766.


# 1 -- unit: [135, 142], samples: 100.0/101, mean: 4.2115e-02, std: 6.8898e-03


[I 2020-06-16 10:08:59,300] Finished trial#1 with value: 0.042115417871587346 with parameters: {'num_unit1': 135, 'num_unit2': 142}. Best is trial#1 with value: 0.042115417871587346.


# 2 -- unit: [101, 61], samples: 101.0/101, mean: 4.4525e-02, std: 8.2808e-03


[I 2020-06-16 10:33:41,954] Finished trial#2 with value: 0.04452450886958897 with parameters: {'num_unit1': 101, 'num_unit2': 61}. Best is trial#1 with value: 0.042115417871587346.


# 3 -- unit: [20, 170], samples: 83.0/101, mean: 2.4767e-01, std: 4.4947e-01


[I 2020-06-16 10:58:38,610] Finished trial#3 with value: 0.2476725416571492 with parameters: {'num_unit1': 20, 'num_unit2': 170}. Best is trial#1 with value: 0.042115417871587346.


# 4 -- unit: [90, 183], samples: 100.0/101, mean: 4.8209e-02, std: 9.1148e-03


[I 2020-06-16 11:23:32,356] Finished trial#4 with value: 0.048209454864263535 with parameters: {'num_unit1': 90, 'num_unit2': 183}. Best is trial#1 with value: 0.042115417871587346.


# 5 -- unit: [21, 62], samples: 83.0/101, mean: 2.3796e-01, std: 4.3758e-01


[I 2020-06-16 11:48:24,819] Finished trial#5 with value: 0.23795512521473489 with parameters: {'num_unit1': 21, 'num_unit2': 62}. Best is trial#1 with value: 0.042115417871587346.


# 6 -- unit: [106, 14], samples: 100.0/101, mean: 4.3253e-02, std: 7.8080e-03


[I 2020-06-16 12:13:28,714] Finished trial#6 with value: 0.04325340996210528 with parameters: {'num_unit1': 106, 'num_unit2': 14}. Best is trial#1 with value: 0.042115417871587346.


# 7 -- unit: [144, 47], samples: 99.0/101, mean: 4.0326e-02, std: 8.3986e-03


[I 2020-06-16 12:39:01,174] Finished trial#7 with value: 0.04032604796535308 with parameters: {'num_unit1': 144, 'num_unit2': 47}. Best is trial#7 with value: 0.04032604796535308.


# 8 -- unit: [20, 30], samples: 78.0/101, mean: 2.8956e-01, std: 4.7500e-01


[I 2020-06-16 13:04:25,012] Finished trial#8 with value: 0.2895568647228255 with parameters: {'num_unit1': 20, 'num_unit2': 30}. Best is trial#7 with value: 0.04032604796535308.


# 9 -- unit: [185, 176], samples: 99.0/101, mean: 3.9344e-02, std: 6.1669e-03


[I 2020-06-16 13:29:38,110] Finished trial#9 with value: 0.039344019265753205 with parameters: {'num_unit1': 185, 'num_unit2': 176}. Best is trial#9 with value: 0.039344019265753205.


#10 -- unit: [193, 125], samples: 97.0/101, mean: 4.0160e-02, std: 6.5694e-03


[I 2020-06-16 13:54:37,217] Finished trial#10 with value: 0.04016041316755927 with parameters: {'num_unit1': 193, 'num_unit2': 125}. Best is trial#9 with value: 0.039344019265753205.


#11 -- unit: [199, 123], samples: 99.0/101, mean: 3.8802e-02, std: 5.3340e-03


[I 2020-06-16 14:19:50,217] Finished trial#11 with value: 0.03880211072835592 with parameters: {'num_unit1': 199, 'num_unit2': 123}. Best is trial#11 with value: 0.03880211072835592.


#12 -- unit: [193, 107], samples: 99.0/101, mean: 3.9015e-02, std: 5.5903e-03


[I 2020-06-16 14:44:48,039] Finished trial#12 with value: 0.0390145168962455 with parameters: {'num_unit1': 193, 'num_unit2': 107}. Best is trial#11 with value: 0.03880211072835592.


#13 -- unit: [169, 98], samples: 100.0/101, mean: 4.0030e-02, std: 6.7552e-03


[I 2020-06-16 15:09:46,825] Finished trial#13 with value: 0.04003014564883001 with parameters: {'num_unit1': 169, 'num_unit2': 98}. Best is trial#11 with value: 0.03880211072835592.


#14 -- unit: [160, 98], samples: 97.0/101, mean: 4.0034e-02, std: 7.1160e-03


[I 2020-06-16 15:34:42,016] Finished trial#14 with value: 0.04003384855050262 with parameters: {'num_unit1': 160, 'num_unit2': 98}. Best is trial#11 with value: 0.03880211072835592.


#15 -- unit: [197, 139], samples: 98.0/101, mean: 3.9350e-02, std: 9.3822e-03


[I 2020-06-16 15:59:56,223] Finished trial#15 with value: 0.039350341655092665 with parameters: {'num_unit1': 197, 'num_unit2': 139}. Best is trial#11 with value: 0.03880211072835592.


#16 -- unit: [200, 113], sampling: 64/101