In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

In [4]:
df = pd.read_csv('Datos\heart.csv')

In [5]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [6]:
df.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

Definimos listas para las variables categóricas enteras, categóricas string y numéricas.

In [7]:
cat_int_feats = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'ca']
cat_str_feats = ['thal']
num_feats = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope']

In [8]:
feats_ordered = cat_int_feats+cat_str_feats+num_feats

In [9]:
df = df[feats_ordered+['target']]

In [10]:
df.head()

Unnamed: 0,sex,cp,fbs,restecg,exang,ca,thal,age,trestbps,chol,thalach,oldpeak,slope,target
0,1,1,1,2,0,0,fixed,63,145,233,150,2.3,3,0
1,1,4,0,2,1,3,normal,67,160,286,108,1.5,2,1
2,1,4,0,2,1,2,reversible,67,120,229,129,2.6,2,0
3,1,3,0,0,0,0,normal,37,130,250,187,3.5,3,0
4,0,2,0,2,0,0,normal,41,130,204,172,1.4,1,0


Separamos los datos en entrenamiento, validación y prueba

In [11]:
train = df.sample(frac=0.8, random_state=100)
train.head()

Unnamed: 0,sex,cp,fbs,restecg,exang,ca,thal,age,trestbps,chol,thalach,oldpeak,slope,target
69,0,4,0,0,0,0,normal,35,138,183,182,1.4,1,0
300,1,4,0,2,0,1,reversible,65,135,254,127,2.8,2,1
220,1,1,0,0,0,2,normal,59,134,204,162,0.8,1,0
134,1,3,1,0,0,0,reversible,42,120,240,194,0.8,3,0
7,0,4,0,0,1,0,normal,57,120,354,163,0.6,1,0


In [12]:
train.shape

(242, 14)

In [13]:
test = df.drop(train.index)
test.head()

Unnamed: 0,sex,cp,fbs,restecg,exang,ca,thal,age,trestbps,chol,thalach,oldpeak,slope,target
0,1,1,1,2,0,0,fixed,63,145,233,150,2.3,3,0
2,1,4,0,2,1,2,reversible,67,120,229,129,2.6,2,0
4,0,2,0,2,0,0,normal,41,130,204,172,1.4,1,0
8,1,4,0,2,0,1,reversible,63,130,254,147,1.4,2,1
13,1,2,0,0,0,0,reversible,44,120,263,173,0.0,1,0


In [14]:
val = train.sample(frac=0.2, random_state=100)

In [15]:
val.shape

(48, 14)

In [16]:
train = train.drop(val.index)

In [18]:
print(train.shape)
print(val.shape)
print(test.shape)

(194, 14)
(48, 14)
(61, 14)


In [19]:
train.describe()

Unnamed: 0,sex,cp,fbs,restecg,exang,ca,age,trestbps,chol,thalach,oldpeak,slope,target
count,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0
mean,0.664948,3.097938,0.134021,1.0,0.319588,0.649485,54.953608,132.876289,249.974227,150.412371,0.990722,1.57732,0.257732
std,0.47323,1.030969,0.341556,0.997406,0.467523,0.927878,8.75415,17.952879,52.803226,22.809984,1.110625,0.590689,0.438517
min,0.0,0.0,0.0,0.0,0.0,0.0,34.0,94.0,126.0,88.0,0.0,1.0,0.0
25%,0.0,2.0,0.0,0.0,0.0,0.0,49.0,120.0,215.75,138.25,0.0,1.0,0.0
50%,1.0,3.0,0.0,1.0,0.0,0.0,56.0,130.0,243.0,154.0,0.6,2.0,0.0
75%,1.0,4.0,0.0,2.0,1.0,1.0,61.0,143.5,281.0,167.75,1.6,2.0,1.0
max,1.0,4.0,1.0,2.0,1.0,3.0,77.0,192.0,564.0,195.0,4.4,3.0,1.0


In [20]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

In [21]:
train_ds = dataframe_to_dataset(train)
val_ds = dataframe_to_dataset(val)
test_ds = dataframe_to_dataset(test)

Separamos los datos de entrenamiento, validación y prueba en lotes

In [22]:
batch_size = 32
train_ds = train_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)

Función para codificar variables numéricas (Keras docs)

In [None]:
def encode_numerical_feature(feature, name, dataset):
    # Crea capa de normalización para este feature
    normalizer = keras.layers.Normalization()

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende las estadísticas de los datos (media, varianza)
    normalizer.adapt(feature_ds)

    # Aplica la normalización a la variable
    encoded_feature = normalizer(feature)
    return encoded_feature

Función para codificar variables categóricas (Keras docs)

In [None]:
def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = keras.layers.StringLookup if is_string else keras.layers.IntegerLookup
    # Crea una capa Lookup para retornas variables 0/1 (dummies)
    # lookup: busca el valor correspondiente de la variable categórica
    lookup = lookup_class(output_mode="binary")

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende el conjunto de posibles valores que toma la variable categórica y asigna enteros
    lookup.adapt(feature_ds)

    # Aplica la conversión de categorías a enteros
    encoded_feature = lookup(feature)
    return encoded_feature

Creamos una lista de inputs para el modelo, de acuerdo con cada tipo de variable

Creamos una capa concatenando todas las variables codificadas

In [None]:
all_feats = keras.layers.concatenate(feats_encoded)

In [None]:
type(all_feats)

Agregamos una capa densa con 32 neuronas y función de activación relu

In [None]:
model_layers = keras.layers.Dense(32, activation='relu')(all_feats)

Agregamos la capa de salida con 1 neurona (probabilidad de sufrir la enfermedad cardiada) y función de activación sigmoide

In [None]:
model_layers = keras.layers.Dense(1, activation='sigmoid')(model_layers)

Creamos el modelo con las capas ya creadas y las variables de entrada

In [None]:
model = keras.Model(inputs, model_layers)

Compilamos el modelo, definiendo optimizador, función de pérdida y métricas adicionales a capturar

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

Aseguramos que Keras use TensorFlow como backend, para asegurar que el modelo pueda usar strings como entradas

In [None]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

Entrenamos el modelo con los datos en el formato tf.Dataset

In [None]:
model.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
inputs = []
for i in cat_int_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="int64"))

In [None]:
for i in cat_str_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="string"))

In [None]:
for i in num_feats:
  inputs.append(keras.Input(shape=(1,), name=i))

In [None]:
for i in inputs:
   print(i)

Creamos una lista de variables codificadas/normalizadas de acuerdo con su tipo, empleando las funciones de codificación/normalización

In [None]:
feats_encoded=[]

In [None]:
for i,feat in enumerate(cat_int_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[i], feat, train_ds, False)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [None]:
for i,feat in enumerate(cat_str_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[len_feats+i], feat, train_ds, True)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [None]:
for i,feat in enumerate(num_feats):
  feats_encoded.append(
      encode_numerical_feature(inputs[len_feats+i], feat, train_ds)
  )

In [None]:
for i in feats_encoded:
  print(i)