In [18]:
from mlflow import MlflowClient
from pprint import pprint
from sklearn.ensemble import RandomForestRegressor
from utils import dataset
import numpy as np

Configuring the MLflow Tracking Client


In [4]:
client = MlflowClient(tracking_uri="http://127.0.0.1:8000")


Searching Experiments

In [5]:
all_experiments = client.search_experiments()

print(all_experiments)


[<Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1702600045892, experiment_id='0', last_update_time=1702600045892, lifecycle_stage='active', name='Default', tags={}>]


Creating Experiments

In [6]:
# Provide an Experiment description that will appear in the UI
experiment_description = "This is the experiment description"

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "experiment-project-name",
    "team": "experiment-team",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
experiment = client.create_experiment(name="Experiment_Models", tags=experiment_tags)


In [9]:
df = dataset.generate_apple_sales_data_with_promo_adjustment()
df.tail()

Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
4995,2023-12-10 21:41:50.761203,21.643051,3.821656,1,0,2.39101,0,1606.454273,1563.064082
4996,2023-12-11 21:41:50.761201,13.808813,1.080603,0,1,0.898693,0,1284.407359,1606.454273
4997,2023-12-12 21:41:50.761198,11.698227,1.911,0,0,2.83986,0,1014.429223,1284.407359
4998,2023-12-13 21:41:50.761194,18.052081,1.000521,0,0,1.18844,0,1367.627356,1014.429223
4999,2023-12-14 21:41:50.761177,17.017294,0.650213,0,0,2.131694,0,1310.468146,1367.627356


In [10]:
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [15]:
mlflow.set_tracking_uri("http://127.0.0.1:8000")

In [16]:
# Sets the current active experiment to the "Apple_Models" experiment and
# returns the Experiment metadata
apple_experiment = mlflow.set_experiment("Apple_Models")

# Define a run name for this iteration of training.
# If this is not set, a unique name will be auto-generated for your run.
run_name = "apples_rf_test"

# Define an artifact path that the model will be saved to.
artifact_path = "rf_apples"


2023/12/14 21:48:13 INFO mlflow.tracking.fluent: Experiment with name 'Apple_Models' does not exist. Creating a new experiment.


In [19]:
# Split the data into features and target and drop irrelevant date field and target field
X = df.drop(columns=["date", "demand"])
y = df["demand"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

# Train the RandomForestRegressor
rf = RandomForestRegressor(**params)

# Fit the model on the training data
rf.fit(X_train, y_train)

# Predict on the validation set
y_pred = rf.predict(X_val)

# Calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

# Assemble the metrics we're going to write into a collection
metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}

# Initiate the MLflow run context
with mlflow.start_run(run_name=run_name) as run:
    # Log the parameters used for the model fit
    mlflow.log_params(params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Log an instance of the trained model for later use
    mlflow.sklearn.log_model(
        sk_model=rf, input_example=X_val, artifact_path=artifact_path
    )


  input_schema = _infer_schema(input_example)
