In [1]:
# initializing the mlflow client

from mlflow import MlflowClient
from pprint import pprint
from sklearn.ensemble import RandomForestRegressor

In [3]:
# initializing the mlflow client
# make sure the server is running in the backend

client = MlflowClient(tracking_uri="http://127.0.0.1:8080")

In [7]:
# getting all experiments from the client

all_experiments = client.search_experiments()

print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/861645051746955503', creation_time=1705826438911, experiment_id='861645051746955503', last_update_time=1705826438911, lifecycle_stage='active', name='first run', tags={}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1705826324411, experiment_id='0', last_update_time=1705826324411, lifecycle_stage='active', name='Default', tags={}>]


In [8]:
# Extract the experiment name and lifecycle_stage

default_experiment = [
    {"name": experiment.name, "lifecycle_stage": experiment.lifecycle_stage}
    for experiment in all_experiments
    if experiment.name == "Default"
][0]

pprint(default_experiment)

{'lifecycle_stage': 'active', 'name': 'Default'}


In [10]:
# creating a new experiment

experiment_description = (
    "This is a grocery forecasting project. "
    "This experiment contains the produce models for apples"
)

experiment_tags = {
    "project_name": "grocery-forecasting",
    "store_dept" : "produce",
    "team" : "stores-ml",
    "project_quarter" : "Q3-2023",
    "mlflow.note.content": experiment_description,
}

produce_apples_experiment = client.create_experiment(name="Apple_models", tags=experiment_tags)

In [11]:
# use search_experiments() to search on the project_name tag key

apples_experiment = client.search_experiments(
    filter_string="tags.`project_name` = 'grocery-forecasting'"
)

pprint(apples_experiment[0])

<Experiment: artifact_location='mlflow-artifacts:/672303619918487701', creation_time=1705829604090, experiment_id='672303619918487701', last_update_time=1705829604090, lifecycle_stage='active', name='Apple_models', tags={'mlflow.note.content': 'This is a grocery forecasting project. This '
                        'experiment contains the produce models for apples',
 'project_name': 'grocery-forecasting',
 'project_quarter': 'Q3-2023',
 'store_dept': 'produce',
 'team': 'stores-ml'}>


In [12]:
# access each tag data

print(apples_experiment[0].tags["team"])

stores-ml


In [30]:
# getting the data
import pandas as pd
import numpy as np

df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
0,2021-04-27 15:34:09.114631,30.584727,6.786845,0,0,2.502464,1,1044.994017,1044.994017
1,2021-04-28 15:34:09.114630,15.465069,9.71652,0,0,1.87118,1,1079.029699,1044.994017
2,2021-04-29 15:34:09.114629,10.786525,1.099836,0,0,1.14916,1,1173.393597,1079.029699
3,2021-04-30 15:34:09.114628,23.648154,9.578136,0,0,0.891414,1,1168.780383,1173.393597
4,2021-05-01 15:34:09.114627,13.861391,4.693826,1,0,0.737711,0,1173.154356,1168.780383


In [26]:
# train and log the model

import mlflow
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mlflow.set_tracking_uri("http://127.0.0.1:8080")

# set the current active experiment to the "apple models" experiment and return the experiment metadata
apple_experiment = mlflow.set_experiment("Apple_Models")

run_name = "apples_rf_test"
artifact_path ="rf_apples"

2024/01/21 15:42:55 INFO mlflow.tracking.fluent: Experiment with name 'Apple_Models' does not exist. Creating a new experiment.


In [27]:
# making the model and predicting

X = df.drop(columns=["date", "demand"])
y = df["demand"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

In [32]:
# Calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}
pprint(metrics)

{'mae': 52.186186460345354,
 'mse': 4218.601187824255,
 'r2': 0.8875649123878548,
 'rmse': 64.95075971706763}


In [33]:
# initiating the run

with mlflow.start_run(run_name=run_name) as run:
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    
    # log an instance of the trained model
    mlflow.sklearn.log_model(
        sk_model=model,
        input_example=X_val,
        artifact_path=artifact_path,
    )

  input_schema = _infer_schema(input_example)


thanks for reading