In [42]:
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import mlflow

from utils.config import *

In [53]:
# Load the dataset
data = fetch_openml(name='creditcard', version=1, as_frame=True)
df = pd.DataFrame(data=data.data, columns=data.feature_names)
df['target'] = data.target

df = df.iloc[:10000]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[data.feature_names],
                                                    df['target'],
                                                    test_size=0.2,
                                                    random_state=42)


In [54]:
def train(params):
    with mlflow.start_run():
        # Train model
        mlflow.log_params(params)

        clf = RandomForestClassifier(**params)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        signature = mlflow.models.infer_signature(X_train[:10], y_pred[:10])

        mlflow.sklearn.log_model(clf,
                                 'model',
                                 signature=signature,
                                 input_example=X_train[:2])

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')

        mlflow.log_metric('accuracy', accuracy)
        mlflow.log_metric('precision', precision)
        mlflow.log_metric('recall', recall)
        mlflow.log_metric('f1', f1)

        # Show results
        print(f"accuracy: {accuracy:.3f}")
        print(f"precision:  {precision:.3f}")
        print(f"recall:  {recall:.3f}")
        print(f"f1:  {f1:.3f}")

In [63]:
path_mlruns = os.path.join(PROJECT_PATH, 'mlflow')
mlflow.set_tracking_uri(f"file:{path_mlruns}")
mlflow.set_experiment('Teste4')

2025/02/28 15:29:38 INFO mlflow.tracking.fluent: Experiment with name 'Teste4' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:C:\\Users\\bruno.deon\\Desktop\\Workspace\\AirFlow\\mlflow/492419223660894322', creation_time=1740767378346, experiment_id='492419223660894322', last_update_time=1740767378346, lifecycle_stage='active', name='Teste4', tags={}>

In [59]:
params = {
    'random_state': 0,
    'n_estimators': 15,
    'max_depth': 3,
    'class_weight': 'balanced'
}

train(params)

accuracy: 0.999
precision:  0.999
recall:  0.999
f1:  0.998


In [52]:
import subprocess

# Get the tracking URI dynamically
path_mlruns = mlflow.get_tracking_uri()

# Define the command to launch MLflow UI on port 5010
command = [
    "mlflow", "ui", "--backend-store-uri", f"{path_mlruns}", "--port", "5050"
]

# Start the MLflow UI in the background
process = subprocess.Popen(command)

print(f"MLflow UI is running at http://127.0.0.1:5050")

MLflow UI is running at http://127.0.0.1:5050


In [64]:
process.terminate()

In [62]:
mlflow.get_experiment_by_name('Teste1')

<Experiment: artifact_location='file:C:\\Users\\bruno.deon\\Desktop\\Workspace\\AirFlow\\mlflow/370614023596435975', creation_time=1740760899120, experiment_id='370614023596435975', last_update_time=1740760899120, lifecycle_stage='active', name='Teste1', tags={}>