# Análisis y Modelado - Heart Disease

Notebook principal con preprocesamiento, modelado y evaluación.

In [None]:
# Instalación de librerías (si es necesario)
!pip install seaborn plotly scikit-learn


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

url = 'https://raw.githubusercontent.com/ChiragSahni/Data-Science-Heart-Disease/master/heart.csv'
df = pd.read_csv(url)
df.head()

In [None]:
# Preprocesamiento
X = df.drop('target', axis=1)
y = df['target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Modelo 1 - Regresión Logística
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)
print('Accuracy (Logistic):', accuracy_score(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))

In [None]:
# Modelo 2 - Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print('Accuracy (RF):', accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

In [None]:
# Guardar modelo (opcional)
import joblib
joblib.dump(rf, '/content/drive/MyDrive/ml-analysis-colab/results/models/random_forest_heart.pkl')
