In [6]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [2]:
# 1. データの準備
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(type(X), type(y))
print(X.shape, y.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(1000, 10) (1000,)


In [3]:
# 2. パラメータ探索範囲の設定
param_grid = {
    'alpha': [0.01, 0.1, 1.0, 10.0],
    'l1_ratio': [0.1, 0.5, 0.9]
}

In [4]:
# 3. MLflowの設定
# mlflow.set_tracking_uri("http://localhost:5000")  # トラッキングサーバのURIを設定
mlflow.set_tracking_uri("http://mlflow:5000")  # トラッキングサーバのURIを設定
mlflow.set_experiment('ElasticNet_Regression2')   # 実験名を設定

2024/10/09 05:00:31 INFO mlflow.tracking.fluent: Experiment with name 'ElasticNet_Regression2' does not exist. Creating a new experiment.


<Experiment: artifact_location='ftp://ftpuser:ftppassword@ftp-server/mlflow/2', creation_time=1728450031152, experiment_id='2', last_update_time=1728450031152, lifecycle_stage='active', name='ElasticNet_Regression2', tags={}>

In [8]:
# 4. ElasticNetモデルのインスタンスとGridSearchCVの設定
model = ElasticNet(random_state=42)
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    scoring='neg_mean_squared_error',
    n_iter=50,  # 試行回数を指定
    cv=5,  # 交差検証の分割数
    random_state=42
)

In [9]:
# 5. パラメータ探索とMLflowへのログ記録
with mlflow.start_run(run_name="ElasticNet_RandomSearch"):
    # 6. モデルのトレーニング（パラメータ探索）
    random_search.fit(X_train, y_train)
    
    # 最良のパラメータを取得
    best_params = random_search.best_params_
    best_model = random_search.best_estimator_
    
    # 7. 最良モデルで予測と評価
    y_pred = best_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # 8. ハイパーパラメータとメトリクスの記録（バックエンドストアに保存）
    mlflow.log_param("best_alpha", best_params['alpha'])
    mlflow.log_param("best_l1_ratio", best_params['l1_ratio'])
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    # 9. 全ての試行結果を記録
    for param, score in zip(random_search.cv_results_['params'], random_search.cv_results_['mean_test_score']):
        mlflow.log_metric(f"mse_alpha_{param['alpha']}_l1_{param['l1_ratio']}", -score)

    # 10. 最良モデルの保存（アーティファクトストレージに保存）
    mlflow.sklearn.log_model(best_model, "best_model")

    print(f"Best parameters: {best_params}")
    print(f"Model saved in run {mlflow.active_run().info.run_uuid}")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

2024/10/09 05:01:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run ElasticNet_RandomSearch at: http://mlflow:5000/#/experiments/2/runs/bf672c9b981548289c11d108c1a49a45.
2024/10/09 05:01:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5000/#/experiments/2.


Best parameters: {'l1_ratio': 0.9, 'alpha': 0.01}
Model saved in run bf672c9b981548289c11d108c1a49a45
Mean Squared Error: 0.03403663559090704
R^2 Score: 0.9999979825513907


In [10]:
# 5. パラメータ探索とMLflowへのログ記録
with mlflow.start_run(run_name="ElasticNet_RandomSearch2"):
    # 6. モデルのトレーニング（パラメータ探索）
    random_search.fit(X_train, y_train)
    
    # 最良のパラメータを取得
    best_params = random_search.best_params_
    best_model = random_search.best_estimator_
    
    # 7. 最良モデルで予測と評価
    y_pred = best_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # 8. ハイパーパラメータとメトリクスの記録（バックエンドストアに保存）
    mlflow.log_param("best_alpha", best_params['alpha'])
    mlflow.log_param("best_l1_ratio", best_params['l1_ratio'])
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    # 9. 全ての試行結果を記録
    for param, score in zip(random_search.cv_results_['params'], random_search.cv_results_['mean_test_score']):
        mlflow.log_metric(f"mse_alpha_{param['alpha']}_l1_{param['l1_ratio']}", -score)

    # 10. 最良モデルの保存（アーティファクトストレージに保存）
    mlflow.sklearn.log_model(best_model, "best_model")

    print(f"Best parameters: {best_params}")
    print(f"Model saved in run {mlflow.active_run().info.run_uuid}")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

2024/10/09 05:02:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run ElasticNet_RandomSearch2 at: http://mlflow:5000/#/experiments/2/runs/5300126bafec48daac119f38737ce098.
2024/10/09 05:02:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow:5000/#/experiments/2.


Best parameters: {'l1_ratio': 0.9, 'alpha': 0.01}
Model saved in run 5300126bafec48daac119f38737ce098
Mean Squared Error: 0.03403663559090704
R^2 Score: 0.9999979825513907
