# 6章 MLflow による実験管理とモデル管理

## 汎用的な MLflow による実験記録およびモデル登録

### 事前準備
```bash
conda env create -f conda.yaml
conda activate azureml-book-ch6-env
ipython kernel install --user --name=azureml-book-ch6-env
```
上記コマンドを1度だけ実行します。

In [None]:
import pickle
import os
import shutil

import pandas as pd
from interpret.glassbox import ExplainableBoostingRegressor
from interpret import show

import mlflow
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

In [None]:
# MLflow を Azure Machine Learning に接続

subscription_id = "SUBSCRIPTION_ID"
resource_group = "RESOURCE_GROUP"
workspace = "AML_WORKSPACE_NAME"

ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id,
    resource_group,
    workspace,
)

azureml_mlflow_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri

mlflow.set_tracking_uri(azureml_mlflow_uri)

In [None]:
# Experiment 名を設定
exp = mlflow.set_experiment("ch7-train-with-mlflow-custom-notebook")

In [None]:
# 学習データと検証データの読み込み
df_train = pd.read_csv("../data/Walmart_train.csv")
df_valid = pd.read_csv("../data/Walmart_valid.csv")

# Date列からMonth列とDay列を追加し、Date列を削除
df_train['Month'] = pd.to_datetime(df_train['Date']).dt.month
df_train['Day'] = pd.to_datetime(df_train['Date']).dt.day
df_train = df_train.drop(columns='Date')
df_valid['Month'] = pd.to_datetime(df_valid['Date']).dt.month
df_valid['Day'] = pd.to_datetime(df_valid['Date']).dt.day
df_valid = df_valid.drop(columns='Date')

# ターゲット変数となる列名を指定
col_target = "Weekly_Sales"

# 学習データと検証データを、特徴量とターゲット変数に分割
X_train = df_train.drop(columns=col_target)
y_train = df_train[col_target].to_numpy().ravel()
X_valid = df_valid.drop(columns=col_target)
y_valid = df_valid[col_target].to_numpy().ravel()

In [None]:
run = mlflow.start_run()

In [None]:
params = {
    'objective': "rmse",
    'max_leaves': 20,
    'learning_rate': 0.1,
    'random_state': 42
}

# ハイパーパラメーターを記録
mlflow.log_params(params)

In [None]:
# Explainable Boosting Machine で回帰を行う
reg = ExplainableBoostingRegressor(**params)
reg.fit(X_train, y_train)

In [None]:
# r_2 を計算
r_2 = reg.score(X_valid, y_valid)
print(r_2)
# メトリックを記録
mlflow.log_metric("r_2_valid", r_2)

In [None]:
# モデルの説明を作成
local_explanation = reg.explain_local(X_valid, y_valid)
global_explanation = reg.explain_global()

In [None]:
show(local_explanation)

In [None]:
show(global_explanation)

In [None]:
# 各特徴量ごとの特徴量重要度や交互作用を可視化した図を記録
for i in range(len(global_explanation.feature_names)):
    global_explanation_fig = global_explanation.visualize(i)
    mlflow.log_figure(global_explanation_fig, artifact_file=f'images/exp_{global_explanation.feature_names[i]}.png')

In [None]:
# モデルを保存
filename = 'ebm_regressor.pkl'
with open(filename,'wb') as f:
    pickle.dump(reg,f)

In [None]:
# mlflow.pyfunc によるモデルのラップ
artifacts = {"ebm_model_path": filename}

signature = mlflow.models.signature.infer_signature(X_valid, y_valid)

class EbmWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        import pickle
        with open(context.artifacts["ebm_model_path"], 'rb') as f:
            self.regressor = pickle.load(f)

    def predict(self, context, model_input):
        return self.regressor.predict(model_input)

In [None]:
# ローカルに mlflow.pyfunc でラップしたモデルを記録する
local_mlflow_model_path = "mlflow_pyfunc_model"

if os.path.exists(local_mlflow_model_path):
    shutil.rmtree(local_mlflow_model_path)

mlflow.pyfunc.save_model(
    path=local_mlflow_model_path,
    python_model=EbmWrapper(),
    conda_env="environment.yaml",
    artifacts=artifacts
)

In [None]:
# モデルの動作確認
loaded_model = mlflow.pyfunc.load_model(local_mlflow_model_path)
loaded_model.predict(X_valid)

In [None]:
# モデルを記録する
# registered_model_name を指定すれば同時に Model registry への登録も行われる
mlflow.pyfunc.log_model(artifact_path=local_mlflow_model_path, 
                        loader_module=None, 
                        data_path=None, 
                        code_path=None,
                        python_model=EbmWrapper(),
                        #registered_model_name="ebm-wrapped-model", 
                        conda_env="environment.yaml",
                        artifacts=artifacts)

In [None]:
mlflow.end_run()