In [10]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import mlflow
import mlflow.sklearn
import pickle
from mlflow import get_artifact_uri 

In [3]:
train_df = pd.read_csv("data/train.csv")
test_df = pd.read_csv("data/test.csv")

X_train = train_df.drop("label", axis=1)
y_train = train_df["label"]

X_test = test_df.drop("label", axis=1)
y_test = test_df["label"]


In [5]:
proporcion_A = (train_df['label'] == 'A').mean().round(2)
print(proporcion_A)


0.33


In [6]:
total_nans = train_df.isna().sum().sum()
print(total_nans)

0


In [20]:
with mlflow.start_run():
    # Entrenar modelo con parámetros específicos
    model = DecisionTreeClassifier(max_depth=3, min_samples_split=4, random_state=42)
    model.fit(X_train, y_train)

    # Predicciones
    y_pred = model.predict(X_test)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')  # Promedio ponderado para multiclase

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    # Log de clasificación
    # Registrar parámetros y métricas
    mlflow.log_param("max_depth", 3)
    mlflow.log_param("min_samples_split", 4)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)


    # Guardar el modelo como un archivo .pkl
    with open("model_1.pkl", "wb") as f:
        pickle.dump(model, f)

    # Log del modelo a MLFlow
    mlflow.log_artifact("model_1.pkl")

    # Log del modelo como un modelo MLFlow
    mlflow.sklearn.log_model(model, "decision_tree_model")

    print(f"Accuracy: {acc:.2f}")
    print(f"F1 Score: {f1:.2f}")



Accuracy: 0.28
F1 Score: 0.27


In [17]:
print(f"Accuracy: {acc:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.28
F1 Score: 0.27


In [15]:
if mlflow.active_run():
    mlflow.end_run()

In [11]:
uri = get_artifact_uri("model_1.pkl") 

print(uri)

file:///home/gonzadzz/GitHub/mlops_istea/parcial/mlruns/0/1b10fe0bbe004c2ea2674dd5268cfc62/artifacts/model_1.pkl


In [18]:
with mlflow.start_run():
    # Entrenar modelo con parámetros max_depth=5
    model = DecisionTreeClassifier(max_depth=5, min_samples_split=2, random_state=42)
    model.fit(X_train, y_train)

    # Predicciones
    y_pred = model.predict(X_test)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')  # Promedio ponderado para multiclase

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    # Log de clasificación
    report = classification_report(y_test, y_pred, output_dict=True)
    for label, metrics in report.items():
        if isinstance(metrics, dict):
            for metric_name, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", value)

    # Guardar el modelo como un archivo .pkl
    with open("model_2.pkl", "wb") as f:
        pickle.dump(model, f)
        
    # Log del modelo a MLFlow
    mlflow.log_artifact("model_2.pkl")

In [19]:
print(f"Accuracy: {acc:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.15
F1 Score: 0.16
