# 6章 MLflow による実験管理とモデル管理

## autologによる実験記録

### 事前準備
```bash
conda env create -f conda.yaml
conda activate azureml-book-ch6-env
ipython kernel install --user --name=azureml-book-ch6-env
```
上記コマンドを1度だけ実行します。

In [None]:
import pandas as pd
import lightgbm as lgb

import mlflow
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

In [None]:
# MLflow を Azure Machine Learning に接続

subscription_id = "SUBSCRIPTION_ID"
resource_group = "RESOURCE_GROUP"
workspace = "AML_WORKSPACE_NAME"

ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id,
    resource_group,
    workspace,
)

azureml_mlflow_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri

mlflow.set_tracking_uri(azureml_mlflow_uri)

In [None]:
# Experiment 名を設定
exp = mlflow.set_experiment("ch7-train-with-mlflow-autolog-notebook")

In [None]:
# 学習データと検証データの読み込み
df_train = pd.read_csv("../data/Walmert_train.csv")
df_valid = pd.read_csv("../data/Walmert_valid.csv")

# Date列からMonth列とDay列を追加し、Date列を削除
df_train['Month'] = pd.to_datetime(df_train['Date']).dt.month
df_train['Day'] = pd.to_datetime(df_train['Date']).dt.day
df_train = df_train.drop(columns='Date')
df_valid['Month'] = pd.to_datetime(df_valid['Date']).dt.month
df_valid['Day'] = pd.to_datetime(df_valid['Date']).dt.day
df_valid = df_valid.drop(columns='Date')

# ターゲット変数となる列名を指定
col_target = "Weekly_Sales"

# 学習データと検証データを、特徴量とターゲット変数に分割
X_train = df_train.drop(columns=col_target)
y_train = df_train[col_target].to_numpy().ravel()
X_valid = df_valid.drop(columns=col_target)
y_valid = df_valid[col_target].to_numpy().ravel()

In [None]:
run = mlflow.start_run()

In [None]:
mlflow.lightgbm.autolog()

In [None]:
# LightGBMのデータセットに変換
# model signature を自動取得するために autolog より後に lgb.Dataset インスタンスを作成する必要がある
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_valid, label=y_valid)

In [None]:
params = {
    'objective': 'regression',
    "boosting_type": "gbdt",
    'metric': 'rmse',
    'num_leaves': 20,
    "max_depth": 10,
    'learning_rate': 0.1,
    "device_type": "cpu",
    "seed": 42,
    "deterministic": True,
}

In [None]:
clf = lgb.train(
    params,
    train_set=train_data,
    valid_sets=valid_data,
)

In [None]:
mlflow.end_run()