In [1]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


mlflow.set_tracking_uri("http://mlflow_container_ui:8000")



In [5]:
import os

from minio import Minio
from minio.error import InvalidResponseError, S3Error

accessID = os.environ.get('AWS_ACCESS_KEY_ID')
accessSecret =  os.environ.get('AWS_SECRET_ACCESS_KEY')
minioUrl =  os.environ.get('MLFLOW_S3_ENDPOINT_URL')
bucketName =  os.environ.get('AWS_BUCKET_NAME')

if  None in (accessID, accessSecret, minioUrl, bucketName):
    print("""
        [!] environment variable is empty! run \'source .env\' to load it from the .env file
        AWS_ACCESS_KEY_ID=
        AWS_SECRET_ACCESS_KEY=
        MLFLOW_S3_ENDPOINT_URL=
        AWS_BUCKET_NAME=
    """ % (accessID, accessSecret, minioUrl, bucketName))
    raise RuntimeError

minioUrlHostWithPort = minioUrl.split('//')[1]
print('[*] minio url: ',minioUrlHostWithPort)

s3Client = Minio(
    minioUrlHostWithPort,
    access_key=accessID,
    secret_key=accessSecret,
    secure=False
)

try:
    s3Client.make_bucket(bucketName)
except S3Error as e:
    print(e)

print(f"buckercreated: {bucketName}")

[*] minio url:  s3:9000
S3 operation failed; code: BucketAlreadyOwnedByYou, message: Your previous request to create the named bucket succeeded and you already own it., resource: /mlflow, request_id: 17C3FD1101D552EF, host_id: a9d04103-dbf3-4f7b-92b5-a71419f7056c, bucket_name: mlflow
buckercreated: mlflow


In [6]:
from mlflow.exceptions import RestException

experiment_name = "iris_experiment"

try:
    experiment_id = mlflow.create_experiment(experiment_name)
except RestException as e:
    print(e)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    experiment_id = experiment.experiment_id

print(experiment_id)

2


In [7]:
import joblib

model_directory = "/srv/data"
model_filename = "random_forest_model.joblib"

model_path = f"{model_directory}/{model_filename}"


with mlflow.start_run(run_name="last_run", experiment_id=experiment_id):
    artifact_uri = mlflow.get_artifact_uri()
    print(artifact_uri)
    print('Load the Iris dataset and training classifier')
    data = load_iris()
    X, y = data.data, data.target

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train a random forest classifier
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    
    joblib.dump(model, model_path)
    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate and log accuracy
    accuracy = accuracy_score(y_test, y_pred)
    mlflow.log_metric("accuracy", accuracy)

    print('Log the model to S3')
    mlflow.sklearn.log_model(model, "model")
    mlflow.log_artifact(model_path) # , artifact_path = "model"


s3://mlflow/2/3a6e0984ebbf45a1b0a2ec5e61770e0c/artifacts
Load the Iris dataset and training classifier
Log the model to S3
