#### Installing MLflow

In [8]:
#%pip install mlflow

#### Importing Libraries

In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import mlflow

#### Creating MLFlow Experiment

Make sure """mlflow ui""" command is running in background (terminal)

In [3]:
mlflow.set_tracking_uri(uri = "http://127.0.0.1:5000")
experiment_name = "classic_evaluation"
mlflow.set_experiment(experiment_name)

2024/12/13 12:17:28 INFO mlflow.tracking.fluent: Experiment with name 'classic_evaluation' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/211927343751898727', creation_time=1734072448898, experiment_id='211927343751898727', last_update_time=1734072448898, lifecycle_stage='active', name='classic_evaluation', tags={}>

#### Creating a sample dataset

In [5]:
x, y = make_classification(n_samples = 1000, n_features = 5, n_classes = 2, random_state = 42)

x_df = pd.DataFrame(x, columns = [f"feature_{i+1}" for i in range(x.shape[1])])
y_df = pd.DataFrame(y, columns = ['target'])
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size = 0.2, random_state = 42)

#### Random Forest Classifier

In [6]:
rfc = RandomForestClassifier(n_estimators = 1)
rfc.fit(x_train, y_train)
predictions = rfc.predict(x_test)

  return fit_method(estimator, *args, **kwargs)


In [7]:
eval_df = pd.DataFrame({
    "target": y_test.to_numpy().flatten(), 
    "predictions": predictions
})
print(eval_df.head())

   target  predictions
0       1            1
1       1            1
2       1            1
3       1            1
4       0            1


In [10]:
# using the eval_df for evaluating our model
result = mlflow.evaluate(
    data = eval_df, 
    model_type = "classifier", 
    targets = "target", 
    predictions = "predictions" 
)

2024/12/13 12:52:31 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/13 12:52:31 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run rebellious-zebra-223 at: http://127.0.0.1:5000/#/experiments/211927343751898727/runs/b5cf9c3645c040c38fb39ad4b1b01928
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/211927343751898727


<Figure size 1050x700 with 0 Axes>

#### Evaluating the model using estimator

Using model as a function

In [11]:
def random_forest_clf(model_input):
    return rfc.predict(model_input)

In [12]:
eval_df_for_model = x_test.copy()
eval_df_for_model["target"] = y_test

In [13]:
eval_df_for_model.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,target
521,1.250932,-1.064291,-2.238231,-0.914547,1.261286,1
737,-0.196283,0.19082,-0.243384,0.154804,-0.256094,1
740,2.659138,-0.265773,1.072978,-0.996758,-2.195564,1
660,0.087778,-0.021011,-0.66778,-0.038709,-0.042586,1
411,-0.662457,0.741043,-0.35834,0.568499,-1.101298,0


In [None]:
result = mlflow.evaluate(
    model = random_forest_clf, 
    data = eval_df_for_model, 
    model_type = "classifier", 
    targets = "target", 
    predictions = "predictions" 
)

2024/12/13 13:00:11 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/13 13:00:11 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run skillful-lark-935 at: http://127.0.0.1:5000/#/experiments/211927343751898727/runs/bf539fcc28324423a0dbb9e3e4632641
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/211927343751898727


In [19]:
result.metrics

{'true_negatives': 72,
 'false_positives': 25,
 'false_negatives': 13,
 'true_positives': 90,
 'example_count': 200,
 'accuracy_score': 0.81,
 'recall_score': 0.8737864077669902,
 'precision_score': 0.782608695652174,
 'f1_score': 0.8256880733944955}

#### Addition of Extra Metrics

In [20]:
from mlflow.metrics import make_metric
from sklearn.metrics import f1_score

In [24]:
def custom_accuracy(df, __builtin_metrics): 
    targets = df["target"]
    predictions = df["prediction"]
    return sum(targets == predictions)/len(targets)

def custom_f1_score(df, __builtin_metrics):
    targets = df["target"]
    predictions = df["prediction"]
    return f1_score(targets, predictions, average = "weighted")

In [25]:
custom_metric_accuracy = make_metric(
    eval_fn = custom_accuracy, 
    name = "custom_accuracy", 
    greater_is_better = True
)

custom_metric_f1_score = make_metric(
    eval_fn = custom_f1_score, 
    name = "custom_f1_score", 
    greater_is_better = True
)

In [26]:
result = mlflow.evaluate(
    model = random_forest_clf, 
    data = eval_df_for_model, 
    model_type = "classifier", 
    targets = "target", 
    predictions = "predictions", 
    extra_metrics = [custom_metric_accuracy, custom_metric_f1_score]
)

2024/12/13 13:12:25 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/13 13:12:25 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run monumental-toad-833 at: http://127.0.0.1:5000/#/experiments/211927343751898727/runs/1ecd63c2bb3e4c1fa221350840ab434a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/211927343751898727


In [27]:
result.artifacts

{'confusion_matrix': ImageEvaluationArtifact(uri='mlflow-artifacts:/211927343751898727/1ecd63c2bb3e4c1fa221350840ab434a/artifacts/confusion_matrix.png')}

#### Adding custom artifacts

In [28]:
from sklearn.metrics import PrecisionRecallDisplay
import matplotlib.pyplot as plt

def custom_precision_recall_curve(df, _builtin_metrics, _artifacts_dir): 
    targets = df["target"]
    predictions = df["prediction"]
    pr_display = PrecisionRecallDisplay.from_predictions(targets, predictions)
    return {"precision_recall_curve": pr_display.figure_}

In [31]:
result = mlflow.evaluate(
    data = eval_df, 
    model_type = "classifier", 
    targets = "target", 
    predictions = "predictions", 
    extra_metrics = [custom_metric_accuracy, custom_metric_f1_score], 
    custom_artifacts = [custom_precision_recall_curve]
)

2024/12/13 13:44:18 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/13 13:44:18 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run exultant-lark-561 at: http://127.0.0.1:5000/#/experiments/211927343751898727/runs/79e094067a9d44b5bf783d21f6098321
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/211927343751898727


In [32]:
result = mlflow.evaluate(
    model = random_forest_clf,
    data = eval_df_for_model, 
    model_type = "classifier", 
    targets = "target", 
    predictions = "predictions", 
    extra_metrics = [custom_metric_accuracy, custom_metric_f1_score], 
    custom_artifacts = [custom_precision_recall_curve]
)

2024/12/13 13:46:37 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/13 13:46:37 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run traveling-ox-408 at: http://127.0.0.1:5000/#/experiments/211927343751898727/runs/ed0e2fb558d0434abe6cb03efaf51e5e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/211927343751898727


In [34]:
result.artifacts

{'precision_recall_curve': ImageEvaluationArtifact(uri='mlflow-artifacts:/211927343751898727/ed0e2fb558d0434abe6cb03efaf51e5e/artifacts/precision_recall_curve.png'),
 'confusion_matrix': ImageEvaluationArtifact(uri='mlflow-artifacts:/211927343751898727/ed0e2fb558d0434abe6cb03efaf51e5e/artifacts/confusion_matrix.png')}