In [1]:
# Cargo las librerías
import importlib
import mlflow
importlib.reload(mlflow)
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.metrics import precision_score, accuracy_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

In [13]:
experiment_name = "experiment_iris2"

In [16]:
if not mlflow.get_experiment_by_name(experiment_name):
    mlflow.create_experiment(name=experiment_name)

In [17]:
experiment = mlflow.get_experiment_by_name(experiment_name)

In [18]:
# Setup de MLflow
mlflow.set_tracking_uri('http://66.97.41.26:8080/')

In [6]:
# Cargo los datos
data = load_iris()

In [7]:
# Hago split entre train y test
x_train, x_test, y_train, y_test = train_test_split(
    data['data'],
    data['target'],
    test_size= 0.2,
    random_state= 1234
)

In [8]:
# Definimos el modelo
rf_class = RandomForestClassifier()

In [9]:
grid = {
    'max_depth':[6,8,10],
    'min_samples_split':[2,3,4,5],
    'min_samples_leaf':[2,3,4,5],
    'max_features': [2,3]
}

In [10]:
# Hago el Grid Search
rf_class_grid = GridSearchCV(rf_class, grid, cv = 5)
rf_class_grid_fit = rf_class_grid.fit(x_train, y_train)

In [11]:
print(f'Best parameters: {rf_class_grid_fit.best_params_}')

Best parameters: {'max_depth': 6, 'max_features': 2, 'min_samples_leaf': 2, 'min_samples_split': 5}


In [22]:
# Ahora hago el loging de los parámetros
with mlflow.start_run(experiment_id = experiment.experiment_id):

    # Logueo los mejores resultados
    mlflow.log_params(rf_class_grid_fit.best_params_)

    # Obtengo las predicciones
    y_pred = rf_class_grid_fit.predict(x_test)

    # Calculo el acuraccy y el AUC
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}')

    # Log de parámetros
    metrics ={
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall
    }

    mlflow.log_metrics(metrics)


    # Log model & artifacts
    np.save('/home/jeo/Documents/facultad/datamining/datamining-frt/p1/x_train', x_train)
    mlflow.log_artifact('x_train.npy')

    mlflow.sklearn.log_model(rf_class_grid_fit, 'iris_rf_first_attempt')

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [20]:
np.save('/home/jeo/Documents/facultad/datamining/datamining-frt/p1', x_train)