In [1]:
from mlflow import MlflowClient 
from pprint import pprint 
from sklearn.ensemble import RandomForestRegressor

In [2]:
client = MlflowClient(tracking_uri="http://127.0.0.1:8080")

In [3]:
all_experiments = client.search_experiments() 

print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/290953786829519714', creation_time=1760728501164, experiment_id='290953786829519714', last_update_time=1760728501164, lifecycle_stage='active', name='Apple_Models', tags={'mlflow.experimentKind': 'custom_model_development',
 'mlflow.note.content': 'This is the grocery forecasting project.This '
                        'experiment contains the produce models for apples.',
 'project_name': 'grocery-forecasting',
 'project_quarter': 'Q3-2023',
 'store_dept': 'produce',
 'team': 'stores-ml'}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1760727060292, experiment_id='0', last_update_time=1760727060292, lifecycle_stage='active', name='Default', tags={'mlflow.experimentKind': 'custom_model_development'}>]


In [4]:
default_experiment = [
    {"name": experiment.name, "lifecycle_stage": experiment.lifecycle_stage}
    for experiment in all_experiments
    if experiment.name == "Default"
][0]

pprint(default_experiment)


{'lifecycle_stage': 'active', 'name': 'Default'}


In [None]:
experiment_description = (
    "This is the grocery forecasting project."
    "This experiment contains the produce models for apples."
)

experiment_tags = {
    "project_name": "grocery-forecasting",
    "store_dept": "produce",
    "team": "stores-ml",
    "project_quarter": "Q3-2023",
    "mlflow.note.content": experiment_description
}

#experiment creation 
produce_apples_experiment = client.create_experiment(
    name="Apple_Models", tags=experiment_tags
)

In [6]:
apples_experiment = client.search_experiments(
    filter_string="tags.`project_name`= 'grocery-forecasting'"
)

print(vars(apples_experiment[0]))

{'_experiment_id': '290953786829519714', '_name': 'Apple_Models', '_artifact_location': 'mlflow-artifacts:/290953786829519714', '_lifecycle_stage': 'active', '_tags': {'project_name': 'grocery-forecasting', 'project_quarter': 'Q3-2023', 'mlflow.note.content': 'This is the grocery forecasting project.This experiment contains the produce models for apples.', 'store_dept': 'produce', 'mlflow.experimentKind': 'custom_model_development', 'team': 'stores-ml'}, '_creation_time': 1760728501164, '_last_update_time': 1760728501164}


In [7]:
pprint(vars(apples_experiment[0]))

{'_artifact_location': 'mlflow-artifacts:/290953786829519714',
 '_creation_time': 1760728501164,
 '_experiment_id': '290953786829519714',
 '_last_update_time': 1760728501164,
 '_lifecycle_stage': 'active',
 '_name': 'Apple_Models',
 '_tags': {'mlflow.experimentKind': 'custom_model_development',
           'mlflow.note.content': 'This is the grocery forecasting '
                                  'project.This experiment contains the '
                                  'produce models for apples.',
           'project_name': 'grocery-forecasting',
           'project_quarter': 'Q3-2023',
           'store_dept': 'produce',
           'team': 'stores-ml'}}


In [11]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta


def generate_apple_sales_data_with_promo_adjustment(
    base_demand: int = 1000, n_rows: int = 5000
):
    """
    Generates a synthetic dataset for predicting apple sales demand with seasonality
    and inflation.

    This function creates a pandas DataFrame with features relevant to apple sales.
    The features include date, average_temperature, rainfall, weekend flag, holiday flag,
    promotional flag, price_per_kg, and the previous day's demand. The target variable,
    'demand', is generated based on a combination of these features with some added noise.

    Args:
        base_demand (int, optional): Base demand for apples. Defaults to 1000.
        n_rows (int, optional): Number of rows (days) of data to generate. Defaults to 5000.

    Returns:
        pd.DataFrame: DataFrame with features and target variable for apple sales prediction.

    Example:
        >>> df = generate_apple_sales_data_with_seasonality(base_demand=1200, n_rows=6000)
        >>> df.head()
    """

    # Set seed for reproducibility
    np.random.seed(9999)

    # Create date range
    dates = [datetime.now() - timedelta(days=i) for i in range(n_rows)]
    dates.reverse()

    # Generate features
    df = pd.DataFrame(
        {
            "date": dates,
            "average_temperature": np.random.uniform(10, 35, n_rows),
            "rainfall": np.random.exponential(5, n_rows),
            "weekend": [(date.weekday() >= 5) * 1 for date in dates],
            "holiday": np.random.choice([0, 1], n_rows, p=[0.97, 0.03]),
            "price_per_kg": np.random.uniform(0.5, 3, n_rows),
            "month": [date.month for date in dates],
        }
    )

    # Introduce inflation over time (years)
    df["inflation_multiplier"] = (
        1 + (df["date"].dt.year - df["date"].dt.year.min()) * 0.03
    )

    # Incorporate seasonality due to apple harvests
    df["harvest_effect"] = np.sin(2 * np.pi * (df["month"] - 3) / 12) + np.sin(
        2 * np.pi * (df["month"] - 9) / 12
    )

    # Modify the price_per_kg based on harvest effect
    df["price_per_kg"] = df["price_per_kg"] - df["harvest_effect"] * 0.5

    # Adjust promo periods to coincide with periods lagging peak harvest by 1 month
    peak_months = [4, 10]  # months following the peak availability
    df["promo"] = np.where(
        df["month"].isin(peak_months),
        1,
        np.random.choice([0, 1], n_rows, p=[0.85, 0.15]),
    )

    # Generate target variable based on features
    base_price_effect = -df["price_per_kg"] * 50
    seasonality_effect = df["harvest_effect"] * 50
    promo_effect = df["promo"] * 200

    df["demand"] = (
        base_demand
        + base_price_effect
        + seasonality_effect
        + promo_effect
        + df["weekend"] * 300
        + np.random.normal(0, 50, n_rows)
    ) * df[
        "inflation_multiplier"
    ]  # adding random noise

    # Add previous day's demand
    df["previous_days_demand"] = df["demand"].shift(1)
    df["previous_days_demand"] = df["previous_days_demand"].fillna(
        method="bfill"
    )  # fill the first row

    # Drop temporary columns
    df = df.drop(columns=["inflation_multiplier", "harvest_effect", "month"])

    return df

In [12]:
data = generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)

data[-20:]

  df["previous_days_demand"] = df["previous_days_demand"].fillna(


Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
980,2025-09-30 15:15:34.013562,34.130183,1.454065,0,0,1.449177,0,971.802447,1001.085782
981,2025-10-01 15:15:34.013560,32.353643,9.462859,0,0,2.856503,1,1030.951553,971.802447
982,2025-10-02 15:15:34.013558,18.816833,0.39147,0,0,1.326429,1,1175.352029,1030.951553
983,2025-10-03 15:15:34.013556,34.533012,2.120477,0,0,0.970131,1,1251.385504,1175.352029
984,2025-10-04 15:15:34.013555,23.057202,2.365705,1,0,1.049931,1,1521.427049,1251.385504
985,2025-10-05 15:15:34.013553,34.810165,3.089005,1,0,2.035149,1,1504.971149,1521.427049
986,2025-10-06 15:15:34.013551,29.208905,3.673292,0,0,2.518098,1,1268.249547,1504.971149
987,2025-10-07 15:15:34.013549,16.428676,4.077782,0,0,1.268979,1,1275.118915,1268.249547
988,2025-10-08 15:15:34.013547,32.067512,2.734454,0,0,0.762317,1,1252.492007,1275.118915
989,2025-10-09 15:15:34.013545,31.938203,13.883486,0,0,1.153301,1,1179.04047,1252.492007


In [13]:
import mlflow
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [14]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [17]:
#set current active experiment
#method returns experiment metadata 
apple_experiment = mlflow.set_experiment("Apple_Models")


#define name for run
#random name will be generated if not provided
run_name = "apples_rf_test" 

#define artifact path for model to be saved to
artifact_path = "rf_apples"

In [21]:
#split data 
X = data.drop(columns=['date', 'demand'])
y = data['demand']


#split data into train/test sets 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10, 
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

#train the model
rf = RandomForestRegressor(**params)

#fit the model on the training data 
rf.fit(X_train, y_train)

#predict on validation set 
y_pred = rf.predict(X_val)

#calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

#format metrics for logging
metrics = {
    "mae": mae,
    "mse": mse,
    "rmse": rmse,
    "r2": r2,
}

#initiate run context
with mlflow.start_run(run_name=run_name) as run:
    #log params for model fit
    mlflow.log_params(params)

    #log metrics from validation 
    mlflow.log_metrics(metrics)

    #log instance for later use
    mlflow.sklearn.log_model(sk_model=rf, input_example=X_val, name=artifact_path, registered_model_name="sk-learn-random-forest-reg-model")

Successfully registered model 'sk-learn-random-forest-reg-model'.
2025/10/19 16:25:44 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-random-forest-reg-model, version 1


🏃 View run apples_rf_test at: http://127.0.0.1:8080/#/experiments/290953786829519714/runs/ccaee1c3f28444acaa03b885ed9a3627
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/290953786829519714


Created version '1' of model 'sk-learn-random-forest-reg-model'.
