run the terminal command `mlflow server --host 127.0.0.1 --port 8080 
`
<br><br> Experiment to add window_size as a param

In [1]:
import mlflow

mlflow.set_tracking_uri(uri="http://127:8080")


In [2]:
from pathlib import Path

import numpy as np
import polars as pl
from mlflow.models import infer_signature
from sklearn import metrics

from lisa.config import INTERIM_DATA_DIR, PLOTS_DIR
from lisa.features import sliding_window, standard_scaler, train_test_split
from lisa.modeling import random_forest
from lisa import evaluate

import os
# ensure that mlruns are saved in the correct directory
os.chdir("..")

input_path: Path = INTERIM_DATA_DIR / "labelled_test_data.csv"

original_df = pl.read_csv(input_path)

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("RF Test")

# Start an MLflow run
with mlflow.start_run() as parent_run:
    windows = np.arange(150, 250, 100)

    for window in windows:
        with mlflow.start_run(nested=True, run_name=f"Window_{window}"):

            df = sliding_window(original_df, period=window, log=True)

            X_train, X_test, y_train, y_test = train_test_split(
                df, train_size=0.8, gap=window
            )

            scaled_X_train, scaled_X_test, scaler = standard_scaler(X_train, X_test)

            params = {"n_estimators": 100, "max_depth": 128}

            model = random_forest.random_forest_classifier(
                scaled_X_train, y_train.to_numpy().ravel(), **params
            )

            accuracy = metrics.accuracy_score(y_test, model.predict(scaled_X_test))
            labels = df["ACTIVITY"].unique(maintain_order=True)
            plot_path = PLOTS_DIR / "confusion_matrix.png"
            cm = evaluate.confusion_matrix(model, labels, scaled_X_test, y_test, plot_path)
            
            # Log the hyperparameters
            params["window"] = window
            mlflow.log_params(params)

            # Log metrics
            mlflow.log_metric("accuracy", accuracy)
            mlflow.log_artifact(plot_path)

            # Set a tag that we can use to remind ourselves what this run was for
            mlflow.set_tag("Training Info", "Basic RF model for labelled test data")

            # Infer the model signature
            signature = infer_signature(
                scaled_X_train, model.predict(scaled_X_train)
            )

            # Log the model
            mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="rf_model",
                signature=signature,
                input_example=scaled_X_train,
            )

[32m2024-09-04 16:52:27.638[0m | [1mINFO    [0m | [36mlisa.config[0m:[36m<module>[0m:[36m15[0m - [1mPROJ_ROOT path is: /Users/tomwilson/code/LISA[0m


[32m2024-09-04 16:52:33.048[0m | [1mINFO    [0m | [36mlisa.features[0m:[36msliding_window[0m:[36m134[0m - [1mAggregating data...[0m


