In [None]:
import mlflow

from sklearn.datasets import load_iris
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import (
    silhouette_score,
    davies_bouldin_score,
)

In [None]:
# Se hace la lectura del dataset de prueba

db = load_iris()
features = db.data
target = db.target

In [None]:
# Opción básica
mlflow.autolog()

# Clustering con DBSCAN.
dbscan = DBSCAN(eps=0.5, min_samples=5)

dbscan.fit_predict(features)

In [None]:
# Clustering con Kmeans.
kmeans = KMeans(n_clusters=3, random_state=42)

kmeans.fit_predict(features)

# davies_bouldin_score(features, kmeans.fit_predict(features))

In [None]:
# Se puede hacer una experimentación nominal

exp_name = 'Clustering-Ejemplos'
exp_id = mlflow.create_experiment(name=exp_name)

with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=2"):
    modelo_clusters = KMeans(n_clusters=2)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 2)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

In [None]:
with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=3"):
    modelo_clusters = KMeans(n_clusters=3)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    score_2 = davies_bouldin_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 3)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    mlflow.log_metric('davies_bouldin_score', score_2)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

In [None]:
with mlflow.start_run(experiment_id=exp_id, run_name="DBSCAN"):
    modelo_clusters = DBSCAN(eps=0.5, min_samples=5)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score=silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('min_samples', 5)
    mlflow.log_param('eps', 0.5)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

In [None]:
!mlflow ui