In [1]:
# Incluir las bibliotecas requeridas para el modelo y su métrica
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score
from sklearn.neural_network import MLPClassifier

#Cargar el archivo desde el repositorio
# URL del archivo CSV público en S3
url = 'https://diabetes-dvcstore.s3.us-east-1.amazonaws.com/files/md5/10/1cada3906fab160ea188043d7f9a1b'
# Carga el archivo directamente en un DataFrame de pandas
df = pd.read_csv(url)

# Separar las características (X) y la variable objetivo (y)

X = df.drop(columns=['Diabetes_012'])
y = df['Diabetes_012']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [3]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-2.17.2-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.17.2 (from mlflow)
  Downloading mlflow_skinny-2.17.2-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.17.2->mlflow)
  Downloading databricks_sdk-0.36.0-py3-none-any.whl.metadata (38 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.5-py3-none-any.whl.metadata (10 kB)
Colle

In [4]:
#Importe MLFlow para registrar los experimentos, el regresor de bosques aleatorios y la métrica de error cuadrático medio
import mlflow
import mlflow.sklearn

# defina el servidor para llevar el registro de modelos y artefactos
#mlflow.set_tracking_uri('http://localhost:5000')
# registre el experimento
experiment = mlflow.set_experiment("Diabetes_USA")

# Aquí se ejecuta MLflow sin especificar un nombre o id del experimento. MLflow los crea un experimento para este cuaderno por defecto y guarda las características del experimento y las métricas definidas.
# Para ver el resultado de las corridas haga click en Experimentos en el menú izquierdo.
with mlflow.start_run(experiment_id=experiment.experiment_id):
    # defina los parámetros del modelo
    hidden_layer_sizes=(300,)
    max_iter=600
    learning_rate_init=0.001
    random_state=64
    # Cree el modelo con los parámetros definidos y entrénelo
    NN_model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, learning_rate_init=learning_rate_init,
                             max_iter=max_iter, random_state=random_state)
    NN_model.fit(X_train, y_train)

    # Realice predicciones de prueba
    predictions = NN_model.predict(X_test)

    # Registre los parámetros
    mlflow.log_param("hidden_layer_sizes", hidden_layer_sizes)
    mlflow.log_param("max_iter", max_iter)
    mlflow.log_param("learning_rate_init", learning_rate_init)
    mlflow.log_param("random_state", random_state)

    # Registre el modelo
    mlflow.sklearn.log_model(NN_model, "Neuronal-Network-model")

    # Cree y registre la métrica de interés
    #mse = mean_squared_error(y_test, predictions)
    accuracy = accuracy_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    mlflow.log_metric("Precision", accuracy)
    mlflow.log_metric("Sensibilidad", recall)
    print(accuracy)
    print(recall)

2024/11/11 03:39:03 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_USA' does not exist. Creating a new experiment.


0.7452508805358277
0.8475258918296893
