# Optimization and Simulation with DR

## optunaによる多目的関数の最適化

### 1. ライブラリ類のインポートとファイルのロード

In [None]:
%matplotlib inline

import os
import copy
import warnings

import numpy as np
import pandas as pd
from pandas import DataFrame

import matplotlib.pyplot as plt
import datarobot as dr
from datarobot_predict.deployment import predict


import optuna
import optunahub

import japanize_matplotlib

if not os.getenv("DATAROBOT_NOTEBOOK_IMAGE"):
    print("not running in DataRobot Notebook")
    from dotenv import load_dotenv
    load_dotenv("../.env", override=True)

client = dr.Client()

seed = 71
np.random.seed(seed)
warnings.filterwarnings("ignore")
plt.rcParams.update({"figure.max_open_warning": 0})
optuna.visualization.is_available()
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
pd.set_option("display.precision", 8)

In [None]:
targets = ["降伏強度", "引張強度"]

df = pd.read_csv("../data/opt_steel_strength.csv")

In [None]:
y_train1 = df["降伏強度"]
y_train2 = df["引張強度"]
X_train = df.drop(["ID", "降伏強度", "引張強度"], axis=1)

### 2. 予測APIを用いる上での必要な情報を準備しておきます。

In [None]:
# 予測APIを叩くための諸々を設定する必要があります
dr.Client()
deployment_ids = ["67bc6ab2f1b3f6b73d560ff8", "67bc6ab999dbbff523b1152a"]
deployments = [dr.Deployment.get(deployment_id) for deployment_id in deployment_ids]
# 実験時間、実験回数（n_trials）での制御も可能
timeout = 15

# 結果格納用
result = []

### 3. `datarobot-predict`を使って予測APIを叩く

In [None]:
# 一回叩いてみる
predictions_1, _ = predict(deployments[0], X_train.iloc[:1])
predictions_2, _ = predict(deployments[1], X_train.iloc[:1])
display(predictions_1)
display(predictions_2)

In [None]:
# 予測の戻り値から予測値を取り出す
predictions_1.loc[0, "降伏強度_PREDICTION"], predictions_2.loc[0, "引張強度_PREDICTION"]

In [None]:
# 結果記録用のヘルパーメソッド
def update_result(result, study, sampler, pruner, n_jobs):
    dict_trial = {
        "sampler": sampler,
        "pruner": pruner,
        "n_jobs": n_jobs,
        "値": study.best_trial.value,
        "回数": len(study.trials),
        **study.best_trial.params,
    }
    result.append(dict_trial)
    print(dict_trial)
    return result

### 4. 二つのターゲットで試してみましょう
複数の最適化する対象設定します。この関数の戻り値がそれぞれ最小/大化されるようにoptunaが探索空間からいい感じの入力データを見つけてくれます。

In [None]:
stat = X_train.describe().T


def objective(trial):

    df_target = DataFrame(index=[0], columns=X_train.columns)
    for col in X_train.columns:
        low = stat.loc[col, "min"] * 0.8
        high = stat.loc[col, "max"] * 1.2
        df_target[col] = trial.suggest_float(col, low, high, step=0.01)

    pred_1 = predict(deployments[0], df_target)[0].loc[0, "降伏強度_PREDICTION"]
    pred_2 = predict(deployments[1], df_target)[0].loc[0, "引張強度_PREDICTION"]

    return pred_1, pred_2

### 5. 最適化を実行する

In [None]:
%%time
dr.Client()
sampler1 = optuna.samplers.NSGAIISampler(seed=42)
study1 = optuna.create_study(directions=["maximize","maximize"], study_name=f"{sampler1.__class__.__name__}",)
study1.optimize(objective, timeout=timeout, gc_after_trial=True)

In [None]:
# 結果を一度保存します
trails_bk = [copy.deepcopy(trials) for trials in study1.trials]
for trial in study1.best_trials:
    print(trial.number, trial.values[0], trial.values[1])

In [None]:
# 使われてる最適化手法を確認
print(f"Sampler is {study1.sampler.__class__.__name__}")

### 6. パレート最適解を可視化してみてみましょう

In [None]:
# optunaの可視化
optuna.visualization.plot_pareto_front(study1)

In [None]:
# 手動で可視化してみる
trial_all = []
for trial in study1.get_trials():
    trial_all.append([trial.number, trial.values[0], trial.values[1]])
trial_all = pd.DataFrame(trial_all, columns=["Iteration", "降伏強度", "引張強度"])

trial_pareto = []
for trial in study1.best_trials:
    trial_pareto.append([trial.number, trial.values[0], trial.values[1]])
trial_pareto = pd.DataFrame(trial_pareto, columns=["Iteration", "降伏強度", "引張強度"])
trial_pareto.sort_values("降伏強度", inplace=True)
# plot
plt.clf()
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(trial_all["降伏強度"], trial_all["引張強度"], linestyle="", marker="*")
ax.plot(trial_pareto["降伏強度"], trial_pareto["引張強度"])
# ax.axis("equal")
fig.tight_layout()
plt.show()
# グラフの軸は０スタートではないので、ご注意ください

### 7. OptunaHubの`Sampler`を試してみる

In [None]:
# optunaにあるNSGAIIは`RamdomSampler`で初期化して、下記は`TPESampler`で初期化している
dr.Client()
package_name = "samplers/nsgaii_with_tpe_warmup"
sampler2 = optunahub.load_module(package=package_name).NSGAIIWithTPEWarmupSampler()
study2 = optuna.create_study(
    sampler=sampler2,
    directions=["maximize", "maximize"],
    study_name=f"{sampler2.__class__.__name__}",
)
study2.optimize(objective, timeout=timeout, gc_after_trial=True)

In [None]:
# CMA-ESを多目的対応した`MoCmaSampler`も試してみる
dr.Client()
package_name = "samplers/mocma"
sampler3 = optunahub.load_module(package=package_name).MoCmaSampler(seed=42)
study3 = optuna.create_study(
    sampler=sampler3,
    directions=["maximize", "maximize"],
    study_name=f"{sampler3.__class__.__name__}",
)
study3.optimize(objective, timeout=timeout, gc_after_trial=True)

### 8. 3つ`Sampler`の結果を比較してみる

In [None]:
from collections.abc import Sequence
from optuna.study import Study
from optuna.visualization._pareto_front import _get_pareto_front_info

# optunahub.load_module("visualization/plot_pareto_front_multi").plot_pareto_front(
#     [study1, study2, study3]
# ).show()


def plot_pareto_front_matplotlib(studies: Sequence[Study]) -> None:
    """Plot pareto front for each study using Matplotlib.

    Args:
        studies:
            A list of study object whose trials are plotted for their pareto fronts.
            The number of objectives must be 2 for all trials and must be the same on all trials.
    """

    if not all(study._is_multi_objective() for study in studies):
        raise ValueError(
            "All studies must be multi-objective. For single-objective optimization, "
            "please use plot_optimization_history instead."
        )

    if not all(
        len(studies[0].directions) == len(study.directions) for study in studies
    ):
        raise ValueError("The number of objectives must be the same for all studies.")

    if not all(len(study.directions) == 2 for study in studies):
        raise ValueError("The number of objectives must be 2 for all studies.")

    info_list = {}
    for study in studies:
        info = _get_pareto_front_info(study, include_dominated_trials=False)
        info_list[study.study_name] = info

    _get_pareto_front_plot_matplotlib(info_list)


def _get_pareto_front_plot_matplotlib(info_dict) -> None:
    title = "Pareto-front Plot for multiple studies"
    fig, ax = plt.subplots()
    ax.set_title(title)

    for key, value in info_dict.items():
        x = [values[0] for _, values in value.best_trials_with_values]
        y = [values[1] for _, values in value.best_trials_with_values]
        ax.scatter(x, y, label=key)

    ax.set_xlabel(value.target_names[0])
    ax.set_ylabel(value.target_names[1])
    ax.legend()
    plt.show()


plot_pareto_front_matplotlib([study1, study2, study3])

### 9. 3つのターゲットで最適化してみましょう

In [None]:
dr.Client()


def objective(trial):

    df_target = DataFrame(index=[0], columns=X_train.columns)
    for col in X_train.columns:
        low = stat.loc[col, "min"] * 0.8
        high = stat.loc[col, "max"] * 1.2
        df_target[col] = trial.suggest_float(col, low, high, step=0.01)

    pred_1 = predict(deployments[0], df_target)[0].loc[0, "降伏強度_PREDICTION"]
    pred_2 = predict(deployments[1], df_target)[0].loc[0, "引張強度_PREDICTION"]

    return pred_1, pred_2, df_target["コバルト"]


samplers = [
    optuna.samplers.RandomSampler(),
    optuna.samplers.TPESampler(),
    optuna.samplers.NSGAIISampler(),
]
studies = []
for sampler in samplers:
    print("--------------------")
    print(f"最適化 Sampler: {sampler.__class__.__name__}")
    study = optuna.create_study(
        sampler=sampler,
        study_name=f"{sampler.__class__.__name__}",
        directions=["maximize", "maximize", "minimize"],
    )
    study.optimize(objective, n_trials=50)
    studies.append(study)

In [None]:
# パレート曲線を描いてみる
# m = optunahub.load_module("visualization/plot_pareto_front_multi")
# fig = m.plot_pareto_front(studies)
# fig.show()

In [None]:
# `HyperVolume`を描いてみる
# 平均点からどこまで改善できたかを確認する
reference_point = [
    df["降伏強度"].mean(),
    df["引張強度"].mean(),
    df["コバルト"].mean(),
]
# m = optunahub.load_module("visualization/plot_hypervolume_history_multi")
# fig = m.plot_hypervolume_history(studies, reference_point)
# fig.show()

In [None]:
from optuna.visualization._hypervolume_history import _get_hypervolume_history_info
from typing import NamedTuple


class _HypervolumeHistoryInfo(NamedTuple):
    trial_numbers: list[int]
    values: list[float]
    study_name: str


def plot_hypervolume_history_matplotlib(
    studies: Sequence[Study], reference_point: Sequence[float]
):
    """Plot hypervolume history for each study using Matplotlib.

    Args:
        studies:
            A list of study object whose trials are plotted for their hypervolumes.
            The number of objectives must be 2 or more for all trials and must be the same on all trials.

        reference_point:
            A reference point to use for hypervolume computation.
            The dimension of the reference point must be the same as the number of objectives.

    Returns:
        A Matplotlib figure object.
    """
    if not all(study._is_multi_objective() for study in studies):
        raise ValueError(
            "All studies must be multi-objective. For single-objective optimization, "
            "please use plot_optimization_history instead."
        )

    if not all(len(study.directions) == len(study.directions) for study in studies):
        raise ValueError("The number of objectives must be the same for all studies.")

    if len(reference_point) != len(studies[0].directions):
        raise ValueError(
            "The dimension of the reference point must be the same as the number of objectives."
        )

    info_list = []
    for study in studies:
        info_ = _get_hypervolume_history_info(
            study, np.asarray(reference_point, dtype=np.float64)
        )
        info_list.append(
            _HypervolumeHistoryInfo(info_.trial_numbers, info_.values, study.study_name)
        )

    return _get_hypervolume_history_plot_matplotlib(info_list)


def _get_hypervolume_history_plot_matplotlib(info_list: list[_HypervolumeHistoryInfo]):
    fig, ax = plt.subplots()
    ax.set_title("Hypervolume History Plot")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Hypervolume")

    for info in info_list:
        ax.plot(info.trial_numbers, info.values, marker="o", label=info.study_name)

    ax.legend()
    plt.show()
    plt.close(fig)  # Close the figure to prevent it from displaying twice


plot_hypervolume_history_matplotlib(studies, reference_point)

In [None]:
# This is the end of this code...