# Predicción falla cardiaca

## 1. Cargar Datos

In [49]:
import pandas as pd

df = pd.read_csv('heart_failure.csv')
df

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.00,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.00,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.00,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.00,2.7,116,0,0,8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,0,61,1,38,1,155000.00,1.1,143,1,1,270,0
295,55.0,0,1820,0,38,0,270000.00,1.2,139,0,0,271,0
296,45.0,0,2060,1,60,0,742000.00,0.8,138,0,0,278,0
297,45.0,0,2413,0,38,0,140000.00,1.4,140,1,1,280,0


## 2. Limpiar

In [50]:
# dividir datos
from sklearn.model_selection import train_test_split

X = df.drop(columns=['DEATH_EVENT', 'time'])
y = df['DEATH_EVENT']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=77)
X

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking
0,75.0,0,582,0,20,1,265000.00,1.9,130,1,0
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0
2,65.0,0,146,0,20,0,162000.00,1.3,129,1,1
3,50.0,1,111,0,20,0,210000.00,1.9,137,1,0
4,65.0,1,160,1,20,0,327000.00,2.7,116,0,0
...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,0,61,1,38,1,155000.00,1.1,143,1,1
295,55.0,0,1820,0,38,0,270000.00,1.2,139,0,0
296,45.0,0,2060,1,60,0,742000.00,0.8,138,0,0
297,45.0,0,2413,0,38,0,140000.00,1.4,140,1,1


In [51]:
# Escalar
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

vars = ['age', 'creatinine_phosphokinase', 'ejection_fraction', 'platelets', 'serum_creatinine', 'serum_sodium']
X_train[vars] = scaler.fit_transform(X_train[vars])
X_test[vars] = scaler.fit_transform(X_test[vars])
X_train

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking
165,0.727273,0,0.096070,1,0.428571,1,0.202328,0.089888,0.628571,0,0
179,0.272727,0,0.103598,0,0.464286,0,0.307795,0.022472,0.771429,1,1
120,0.363636,1,0.091095,0,0.821429,1,0.224148,0.112360,0.628571,1,1
232,0.000000,1,0.013524,0,0.375000,0,0.278700,0.044944,0.685714,1,0
5,0.909091,1,0.003062,0,0.464286,1,0.216875,0.179775,0.542857,1,1
...,...,...,...,...,...,...,...,...,...,...,...
293,0.418182,1,0.010207,1,0.375000,0,0.186568,0.044944,0.657143,1,1
235,0.672727,1,0.010972,0,0.642857,1,0.461753,0.067416,0.685714,1,0
84,0.345455,1,0.032789,1,0.196429,1,0.335677,0.056180,0.800000,0,0
95,0.327273,1,0.014034,0,0.821429,1,0.235059,0.056180,0.800000,1,0


## 3. Modelar

In [54]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
y_probs = model.predict_proba(X_test)[:, 1]

thresholds = np.arange(0.1, 0.9, 0.05)

print(f"{'Umbral':<10} {'Accuracy':<10} {'Recall (Detecta Muertes)':<25} {'Precision (No alarma en falso)':<25}")
print("-" * 75)

best_threshold = 0.5
best_acc = 0

for t in thresholds:
    y_pred_t = (y_probs >= t).astype(int)
    
    acc = accuracy_score(y_test, y_pred_t)
    rec = recall_score(y_test, y_pred_t)
    prec = precision_score(y_test, y_pred_t, zero_division=0)
    
    if acc > best_acc:
        best_acc = acc
        best_threshold = t
        
    print(f"{t:.2f}       {acc:.2f}       {rec:.2f}                      {prec:.2f}")

print("-" * 75)
print(f"El mejor umbral para Accuracy parece ser: {best_threshold:.2f} con {best_acc:.2%}")


final_threshold = 0.55

final_prediction = (y_probs >= final_threshold).astype(int)

print(f"--- RESULTADOS FINALES CON UMBRAL {final_threshold} ---")
print("Matriz de Confusión:")
print(confusion_matrix(y_test, final_prediction))

print("\nReporte Clasificación:")
print(classification_report(y_test, final_prediction))

Umbral     Accuracy   Recall (Detecta Muertes)  Precision (No alarma en falso)
---------------------------------------------------------------------------
0.10       0.31       1.00                      0.31
0.15       0.33       1.00                      0.32
0.20       0.33       1.00                      0.32
0.25       0.37       1.00                      0.33
0.30       0.41       1.00                      0.34
0.35       0.51       1.00                      0.38
0.40       0.57       1.00                      0.42
0.45       0.59       0.96                      0.42
0.50       0.57       0.78                      0.40
0.55       0.65       0.70                      0.46
0.60       0.68       0.52                      0.48
0.65       0.69       0.26                      0.50
0.70       0.71       0.13                      0.60
0.75       0.69       0.04                      0.50
0.80       0.69       0.00                      0.00
0.85       0.69       0.00                      0.

## 5. Serializar

In [56]:
import joblib
joblib.dump(model, 'heart_failure_model.joblib')
joblib.dump(scaler, 'heart_failure_scaler.joblib')

['heart_failure_scaler.joblib']