# **Modelo**

Vamos a entrenar el modelo, usando MLFlow para el versionamiento

## 1.Librerias y MLFlow

In [2]:
#Importamos librerias
import os
import mlflow

# Carpeta local para almacenar los experimentos
project_root = os.getcwd()
mlruns_path  = os.path.join(project_root, "mlruns")
os.makedirs(mlruns_path, exist_ok=True)

# MLflow en local
mlflow.set_tracking_uri(f"file:///{mlruns_path.replace(os.sep, '/')}")

# Crear o seleccionar experimento
experiment_name = "corazon"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location=('file:///c:/Users/Javier/Documents/Utpl/Herramientas/Proyecto '
 'Corazón/mlruns/828798877481150983'), creation_time=1748045001838, experiment_id='828798877481150983', last_update_time=1748045001838, lifecycle_stage='active', name='corazon', tags={}>

## 2.Entrenamiento

### 2.1.Entrenar

In [3]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Carga de datos
df = pd.read_csv('Data/medical_data.csv')

# Definir target y features
y = df['Result']                # 'negative' / 'positive'
X = df.drop('Result', axis=1)

#Codificar target
le = LabelEncoder()
y_enc = le.fit_transform(y)
print("Mapping Result → números:", dict(zip(le.classes_, le.transform(le.classes_))))

# Seleccionar y escalar variables numéricas
num_cols = [
    'Gender',
    'CK-MB', 'Troponin', 'Age',
    'Heart rate', 
    'Systolic blood pressure',
    'Diastolic blood pressure',
    'Blood sugar'
]
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Dividir en train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enc,
    test_size=0.2,
    random_state=42,
    stratify=y_enc
)

# Definir hiperparámetros
params = {
    "n_estimators": 100,
    "max_depth": 10,
    "min_samples_split": 5,
    "random_state": 42
}

# Entrenamiento y logging en MLflow
with mlflow.start_run(run_name="rf_medical"):
    mlflow.log_params(params)
    
    clf = RandomForestClassifier(**params)
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    mlflow.log_metric("accuracy", acc)
    
    report = classification_report(y_test, y_pred, output_dict=True)
    for cls, metrics in report.items():
        if cls in le.classes_:
            mlflow.log_metric(f"f1_{cls}", metrics["f1-score"])
    
    mlflow.sklearn.log_model(clf, artifact_path="rf_medical_model")

# Informe de resultados
print(f"Accuracy: {acc:.3f}")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Mapping Result → números: {'negative': np.int64(0), 'positive': np.int64(1)}




Accuracy: 0.985
              precision    recall  f1-score   support

    negative       0.98      0.98      0.98       102
    positive       0.99      0.99      0.99       162

    accuracy                           0.98       264
   macro avg       0.98      0.98      0.98       264
weighted avg       0.98      0.98      0.98       264



## 3.Abrir MLFlow
Es necesario ejecutar este codigo para abrir MLFlow y poder ejecutar app sin problemas

In [3]:
# Libreria
import subprocess

# Construye la ruta absoluta a mlruns (convierte '\' en '/')
project_root = os.getcwd()
mlruns_path  = os.path.join(project_root, "mlruns").replace("\\", "/")

# Monta el comando tal cual lo harías en la terminal
cmd = [
    "mlflow", "ui",
    "--backend-store-uri",    f"file:///{mlruns_path}",
    "--default-artifact-root",f"file:///{mlruns_path}",
    "--port",                 "9090"
]

# Lanza el proceso en background
# stdout/stderr se envían a pipes; no bloquea la ejecución de la celda
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

print("✔ MLflow UI arrancado en http://localhost:9090")

✔ MLflow UI arrancado en http://localhost:9090
