In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Cargar datos
df = pd.read_csv("heart.csv")

# Separar características y objetivo
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

# Crear un pipeline para preprocesar las columnas categóricas y numéricas
categorical_columns = X.select_dtypes(include="object").columns
numeric_columns = X.select_dtypes(exclude="object").columns

# Preprocesamiento
categorical_preprocessor = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

numeric_preprocessor = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

# Combinar preprocesamiento
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_preprocessor, numeric_columns),
        ("cat", categorical_preprocessor, categorical_columns)
    ]
)

# Crear el pipeline completo
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=10000))
])

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Evaluar el modelo
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Precisión:", accuracy)
print("F1 Score:", f1)


Precisión: 0.8811881188118812
F1 Score: 0.8959537572254335
