In [1]:
from mlflow import MlflowClient
from pprint import pprint
from sklearn.ensemble import RandomForestClassifier

In [5]:
client=MlflowClient(tracking_uri="http://127.0.0.1:8080")
all_experiments = client.search_experiments()
print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1753270328097, experiment_id='0', last_update_time=1753270328097, lifecycle_stage='active', name='Default', tags={}>]


In [9]:
exp1=[{"name":exp.name, "lifecycle_stage":exp.lifecycle_stage}
    for exp in all_experiments
    if exp.name=='Default'][0]
    
pprint(exp1)

{'lifecycle_stage': 'active', 'name': 'Default'}


Bad pipe message: %s [b'"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge"']
Bad pipe message: %s [b'="138"\r\nsec-ch-ua-mobile: ?0\r\nsec-ch-ua-platform: "Windows']
Bad pipe message: %s [b'\nUpgrade-Insecure-Requests: 1\r\nUs', b'-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.']
Bad pipe message: %s [b'0.0 Safari/537.36 Edg/138.0.0.0\r\nAccept: text/h']
Bad pipe message: %s [b'ol: max-age=0\r\nsec-ch-ua: "Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"\r\nsec-ch-ua-mobile: ?0\r\n']
Bad pipe message: %s [b'c-ch-ua-platform: "Windows"\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) A', b'leWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0\r\nAccept: text/html,application']
Bad pipe message: %s [b'html+xml,application/xml;q=0.9,image/avif,imag']
Bad pipe message: %s [b'ol: max-age=0\r\nsec-ch-ua: "Not)A;Brand";v="8", "Chromium";v="138", "Micro

# creating new experiment

In [10]:
experiment_description = (
    "This is the grocery forecasting project. "
    "This experiment contains the produce models for apples."
)

experiment_tags = {
    "project_name": "grocery-forecasting",
    "store_dept": "produce",
    "team": "stores-ml",
    "project_quarter": "Q3-2023",
    "content": experiment_description,
}

produce_apples_experiment = client.create_experiment(name="Apple_Models",
                                 tags=experiment_tags)

In [23]:
apple_exp=client.search_experiments(
    filter_string="tags.`project_name`='grocery-forecasting'"
)
pprint(apple_exp[0])
print(apple_exp[0].lifecycle_stage)
print(apple_exp[0].tags['store_dept'])

<Experiment: artifact_location='mlflow-artifacts:/132431847052009726', creation_time=1753286437964, experiment_id='132431847052009726', last_update_time=1753286437964, lifecycle_stage='active', name='Apple_Models', tags={'content': 'This is the grocery forecasting project. This experiment contains '
            'the produce models for apples.',
 'project_name': 'grocery-forecasting',
 'project_quarter': 'Q3-2023',
 'store_dept': 'produce',
 'team': 'stores-ml'}>
active
produce


In [25]:
from datetime import datetime, timedelta

import numpy as np
import pandas as pd


def generate_apple_sales_data(base_demand: int = 1000, n_rows: int = 5000):
    # Set seed for reproducibility
    np.random.seed(9999)

    # Create date range
    dates = [datetime.now() - timedelta(days=i) for i in range(n_rows)]
    dates.reverse()

    # Generate features
    df = pd.DataFrame(
        {
            "date": dates,
            "average_temperature": np.random.uniform(10, 35, n_rows),
            "rainfall": np.random.exponential(5, n_rows),
            "weekend": [(date.weekday() >= 5) * 1 for date in dates],
            "holiday": np.random.choice([0, 1], n_rows, p=[0.97, 0.03]),
            "price_per_kg": np.random.uniform(0.5, 3, n_rows),
            "month": [date.month for date in dates],
        }
    )

    # Introduce inflation over time (years)
    df["inflation_multiplier"] = 1 + (df["date"].dt.year - df["date"].dt.year.min()) * 0.03

    # Incorporate seasonality due to apple harvests
    df["harvest_effect"] = np.sin(2 * np.pi * (df["month"] - 3) / 12) + np.sin(
        2 * np.pi * (df["month"] - 9) / 12
    )

    # Modify the price_per_kg based on harvest effect
    df["price_per_kg"] = df["price_per_kg"] - df["harvest_effect"] * 0.5

    # Adjust promo periods to coincide with periods lagging peak harvest by 1 month
    peak_months = [4, 10]  # months following the peak availability
    df["promo"] = np.where(
        df["month"].isin(peak_months),
        1,
        np.random.choice([0, 1], n_rows, p=[0.85, 0.15]),
    )

    # Generate target variable based on features
    base_price_effect = -df["price_per_kg"] * 50
    seasonality_effect = df["harvest_effect"] * 50
    promo_effect = df["promo"] * 200

    df["demand"] = (
        base_demand
        + base_price_effect
        + seasonality_effect
        + promo_effect
        + df["weekend"] * 300
        + np.random.normal(0, 50, n_rows)
    ) * df["inflation_multiplier"]  # adding random noise

    # Add previous day's demand
    df["previous_days_demand"] = df["demand"].shift(1)
    df["previous_days_demand"].fillna(method="bfill", inplace=True)  # fill the first row

    # Drop temporary columns
    df.drop(columns=["inflation_multiplier", "harvest_effect", "month"], inplace=True)

    return df

In [26]:
# Generate the dataset!

df = generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)
df.tail(20)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["previous_days_demand"].fillna(method="bfill", inplace=True)  # fill the first row
  df["previous_days_demand"].fillna(method="bfill", inplace=True)  # fill the first row


Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
980,2025-07-04 21:48:22.139740,34.130183,1.454065,0,0,1.449177,0,999.30629,1029.418398
981,2025-07-05 21:48:22.139739,32.353643,9.462859,1,0,2.856503,0,1169.129427,999.30629
982,2025-07-06 21:48:22.139737,18.816833,0.39147,1,0,1.326429,0,1317.616709,1169.129427
983,2025-07-07 21:48:22.139736,34.533012,2.120477,0,0,0.970131,0,1068.802075,1317.616709
984,2025-07-08 21:48:22.139734,23.057202,2.365705,0,0,1.049931,0,1019.486305,1068.802075
985,2025-07-09 21:48:22.139733,34.810165,3.089005,0,0,2.035149,0,1002.564672,1019.486305
986,2025-07-10 21:48:22.139732,29.208905,3.673292,0,0,2.518098,0,1086.143402,1002.564672
987,2025-07-11 21:48:22.139731,16.428676,4.077782,0,0,1.268979,0,1093.207186,1086.143402
988,2025-07-12 21:48:22.139729,32.067512,2.734454,1,0,0.762317,0,1396.939894,1093.207186
989,2025-07-13 21:48:22.139728,31.938203,13.883486,1,0,1.153301,0,1321.40954,1396.939894


In [32]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score
import mlflow

mlflow.set_tracking_uri("http://127.0.1:8080")
apple_exp=mlflow.set_experiment("Apple_Models")
run_name='apple_rf_forest'
artifact_name='rf_apples'

In [33]:
x=df.drop(columns=['date','demand'])
y=df['demand']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

params={
    "n_estimators": 100,
    "max_depth": 10,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

rf=RandomForestRegressor(**params)

rf.fit(x_train,y_train)
y_pred=rf.predict(x_test)

mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)
r2=r2_score(y_test,y_pred)

metrics={'mae':mae,'mse':mse,'rmse':rmse,'r2':r2}

with mlflow.start_run(run_name=run_name) as run:
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(sk_model=rf,input_example=x_test,name=artifact_name)



🏃 View run apple_rf_forest at: http://127.0.1:8080/#/experiments/132431847052009726/runs/3104334132e9483c943d9daf19c68f9d
🧪 View experiment at: http://127.0.1:8080/#/experiments/132431847052009726
