In [1]:
import os
import mlflow

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

from wrapper import SklearnModelWrapper

In [2]:
os.environ["MLFLOW_S3_ENDPOINT_URL"] = 'http://46.101.217.205:19001'
os.environ["MLFLOW_TRACKING_URI"] = 'http://46.101.217.205:5900'
os.environ["AWS_ACCESS_KEY_ID"] = 'IAM_ACCESS_KEY'
os.environ["AWS_SECRET_ACCESS_KEY"] = 'IAM_SECRET_KEY'

In [3]:
mlflow.set_tracking_uri(os.environ["MLFLOW_TRACKING_URI"])
client = mlflow.tracking.MlflowClient()

In [4]:
experiment = client.get_experiment_by_name('iris_sklearn')
experiment

<Experiment: artifact_location='s3://mlflow/1', experiment_id='1', lifecycle_stage='active', name='iris_sklearn', tags={}>

In [5]:
X, y = datasets.load_iris(return_X_y=True, as_frame=True)
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [6]:
X = X.iloc[:, :2]

In [7]:
(X_train, X_test,
 y_train, y_test, ) = train_test_split(X, y,
                                       test_size=0.3,
                                       random_state=11)
X_train.shape, X_test.shape, y_train.mean(), y_test.mean()

((105, 2), (45, 2), 0.9714285714285714, 1.0666666666666667)

In [8]:
with mlflow.start_run(experiment_id=experiment.experiment_id, run_name='iris') as run:
    for n_estimators in range(10, 30, 10):
        with mlflow.start_run(experiment_id=experiment.experiment_id, nested=True,) as nested_run:
            sk_model = RandomForestRegressor(n_estimators=n_estimators)
            sk_model.fit(X_train, y_train)

            mlflow.sklearn.log_model(sk_model, 'random-forest-model')

            mse = mean_squared_error(y_test, sk_model.predict(X_test))
            mlflow.log_metric('mse', mse)