In [None]:
#Instalando las librerias requeridas
import sys
print(sys.version)
!{sys.executable} -m pip install plotnine #tensorflow, numpy, pandas, plotnine, sklearn

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
from plotnine import *

Init Plugin
Init Graph Optimizer
Init Kernel


# Actividad, prediciendo enfermedad cardiovascular

En esta actividad vamos a entrenar una red neuronal para predecir enfermedades cardiovasculares.<br>
El set de datos consiste en 60,000 pacientes (cardioTrain.csv). Los datos consisten en las siguientes variables:
* age = edad de la persona en días
* gender = F para mujer, M para hombre
* height = altura de la persona
* weight = peso de la persona
* ap_hi = presión sistólica
* ap_lo = presión diastólica
* cholesterol = nivel de colesterol en la sangre (normal, medium: cercano a la norma, high: muy arriba de la norma)
* gluc = nivel de glucosa en la sangre (normal, medium: cercano a la norma, high: muy arriba de la norma)
* smoke = la persona fuma (no, yes)
* alco = la persona bebe alcohol (no, yes)
* active = la persona realiza actividad física (no, yes)
* cardio = presencia de enfermedad cardiovascular (0: no, 1: si). Variable a predecir

Ustes debera hacer lo siguiente:
1. Leer y preprocesar los datos (realice un procesamiento rápido y justificado). 
2. Buscar los híperparametros para que realice una correcta clasificación (número de neuronas, número de capas, función de activación)
3. Aplicar el modelo aprendido sobre los datos cardioEvaluar y verificar el accuracy obtenido.

In [57]:
#Obteniendo el set de datos, 60000 puntos con las variables descritas
data = pd.read_csv("cardioTrain.csv")
data

Unnamed: 0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,19878,F,172,78.0,120,80,normal,normal,no,no,yes,0
1,17455,M,168,76.0,110,80,normal,normal,no,no,yes,0
2,21289,M,165,45.0,110,80,normal,normal,no,no,no,0
3,20618,F,170,68.0,120,80,normal,normal,no,no,yes,0
4,22576,M,185,95.0,140,80,normal,normal,yes,no,no,1
...,...,...,...,...,...,...,...,...,...,...,...,...
59995,19498,M,160,81.0,80,120,high,high,yes,yes,yes,1
59996,18166,F,168,66.0,120,80,normal,normal,no,no,yes,0
59997,20510,F,160,54.0,110,70,normal,normal,no,no,yes,0
59998,17382,F,172,71.0,150,90,medium,normal,no,no,yes,1


In [58]:
#Estandarización de las variables numericas: Edad tenia una escala distinta a las demás variables
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

ct = ColumnTransformer([
        ('somename', StandardScaler(),["age","height","weight","ap_hi","ap_lo"])
    ], remainder='passthrough')

data_transformed = pd.DataFrame(ct.fit_transform(data),columns = ["age","height","weight","ap_hi","ap_lo","gender","cholesterol","gluc","smoke","alco","active","cardio"])

In [59]:
#One hot encoding para el género

dummy_gender = pd.get_dummies(data['gender'])

data_transformed = pd.merge(
    left=dummy_gender,
    right=data_transformed,
    left_index=True,
    right_index=True,
)

data_transformed

Unnamed: 0,F,M,age,height,weight,ap_hi,ap_lo,gender,cholesterol,gluc,smoke,alco,active,cardio
0,1,0,0.164245,0.931816,0.26213,-0.059952,-0.088037,F,normal,normal,no,no,yes,0
1,0,1,-0.818968,0.444028,0.122926,-0.129731,-0.088037,M,normal,normal,no,no,yes,0
2,0,1,0.736805,0.078186,-2.03474,-0.129731,-0.088037,M,normal,normal,no,no,no,0
3,1,0,0.464524,0.687922,-0.433891,-0.059952,-0.088037,F,normal,normal,no,no,yes,0
4,0,1,1.259048,2.517128,1.445367,0.079606,-0.088037,M,normal,normal,yes,no,no,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0,1,0.010047,-0.531549,0.470937,-0.339068,0.121767,M,high,high,yes,yes,yes,1
59996,1,0,-0.530456,0.444028,-0.573095,-0.059952,-0.088037,F,normal,normal,no,no,yes,0
59997,1,0,0.4207,-0.531549,-1.408321,-0.129731,-0.140488,F,normal,normal,no,no,yes,0
59998,1,0,-0.84859,0.931816,-0.225084,0.149385,-0.035586,F,medium,normal,no,no,yes,1


In [60]:
#Ordinal encoder para cholesterol y gluc

scale_mapper = {"normal":1,"medium":2,"high":3}

data_transformed["cholesterol"] = data_transformed["cholesterol"].replace(scale_mapper)
data_transformed["gluc"] = data_transformed["gluc"].replace(scale_mapper)

data_transformed

Unnamed: 0,F,M,age,height,weight,ap_hi,ap_lo,gender,cholesterol,gluc,smoke,alco,active,cardio
0,1,0,0.164245,0.931816,0.26213,-0.059952,-0.088037,F,1,1,no,no,yes,0
1,0,1,-0.818968,0.444028,0.122926,-0.129731,-0.088037,M,1,1,no,no,yes,0
2,0,1,0.736805,0.078186,-2.03474,-0.129731,-0.088037,M,1,1,no,no,no,0
3,1,0,0.464524,0.687922,-0.433891,-0.059952,-0.088037,F,1,1,no,no,yes,0
4,0,1,1.259048,2.517128,1.445367,0.079606,-0.088037,M,1,1,yes,no,no,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0,1,0.010047,-0.531549,0.470937,-0.339068,0.121767,M,3,3,yes,yes,yes,1
59996,1,0,-0.530456,0.444028,-0.573095,-0.059952,-0.088037,F,1,1,no,no,yes,0
59997,1,0,0.4207,-0.531549,-1.408321,-0.129731,-0.140488,F,1,1,no,no,yes,0
59998,1,0,-0.84859,0.931816,-0.225084,0.149385,-0.035586,F,2,1,no,no,yes,1


In [61]:
#Replace yes to 1 and no to 0

scale_mapper = {"yes":1,"no":0}

data_transformed["smoke"] = data_transformed["smoke"].replace(scale_mapper)
data_transformed["alco"] = data_transformed["alco"].replace(scale_mapper)
data_transformed["active"] = data_transformed["active"].replace(scale_mapper)

data_transformed

Unnamed: 0,F,M,age,height,weight,ap_hi,ap_lo,gender,cholesterol,gluc,smoke,alco,active,cardio
0,1,0,0.164245,0.931816,0.26213,-0.059952,-0.088037,F,1,1,0,0,1,0
1,0,1,-0.818968,0.444028,0.122926,-0.129731,-0.088037,M,1,1,0,0,1,0
2,0,1,0.736805,0.078186,-2.03474,-0.129731,-0.088037,M,1,1,0,0,0,0
3,1,0,0.464524,0.687922,-0.433891,-0.059952,-0.088037,F,1,1,0,0,1,0
4,0,1,1.259048,2.517128,1.445367,0.079606,-0.088037,M,1,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0,1,0.010047,-0.531549,0.470937,-0.339068,0.121767,M,3,3,1,1,1,1
59996,1,0,-0.530456,0.444028,-0.573095,-0.059952,-0.088037,F,1,1,0,0,1,0
59997,1,0,0.4207,-0.531549,-1.408321,-0.129731,-0.140488,F,1,1,0,0,1,0
59998,1,0,-0.84859,0.931816,-0.225084,0.149385,-0.035586,F,2,1,0,0,1,1


In [62]:
data_transformed.drop("gender",axis=1, inplace=True)

In [63]:
data_transformed

Unnamed: 0,F,M,age,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,1,0,0.164245,0.931816,0.26213,-0.059952,-0.088037,1,1,0,0,1,0
1,0,1,-0.818968,0.444028,0.122926,-0.129731,-0.088037,1,1,0,0,1,0
2,0,1,0.736805,0.078186,-2.03474,-0.129731,-0.088037,1,1,0,0,0,0
3,1,0,0.464524,0.687922,-0.433891,-0.059952,-0.088037,1,1,0,0,1,0
4,0,1,1.259048,2.517128,1.445367,0.079606,-0.088037,1,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0,1,0.010047,-0.531549,0.470937,-0.339068,0.121767,3,3,1,1,1,1
59996,1,0,-0.530456,0.444028,-0.573095,-0.059952,-0.088037,1,1,0,0,1,0
59997,1,0,0.4207,-0.531549,-1.408321,-0.129731,-0.140488,1,1,0,0,1,0
59998,1,0,-0.84859,0.931816,-0.225084,0.149385,-0.035586,2,1,0,0,1,1


In [32]:
#Instalando las librerias requeridas
import sys
print(sys.version)
!{sys.executable} -m pip install keras-tuner #tensorflow, numpy, pandas, plotnine, sklearn

3.9.12 (main, Jun  1 2022, 06:34:44) 
[Clang 12.0.0 ]
Collecting keras-tuner
  Downloading keras_tuner-1.1.3-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.7/135.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)


Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.1.3 kt-legacy-1.0.4


In [40]:
#Parte 2: Búsqueda de hiperparámetros
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch
from tensorflow import keras

In [115]:
def build_model(hp):
    model = keras.Sequential()

    model.add(layers.InputLayer(input_shape=(11,)))

    for i in range(hp.Int('num_layers', 1, 10)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    return model

In [116]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='project',
    overwrite=True,
    project_name='cardiovascular')

In [85]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 10, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [76]:
X = data_transformed.iloc[:,:-2]
y = data_transformed.iloc[:,-1]


X.age = X.age.astype("float64")
X.height = X.height.astype("float64")
X.ap_hi = X.ap_hi.astype("float64")
X.ap_lo = X.ap_lo.astype("float64")
X.weight = X.weight.astype("float64")

X.F = X.F.astype("int64")
X.M = X.F.astype("int64")


y = y.astype("int64")



y.dtypes

dtype('int64')

In [77]:
#Getting train data (X) and target data (y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [117]:
tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_test, y_test))

Trial 5 Complete [00h 03m 06s]
val_accuracy: 0.7299999992052714

Best val_accuracy So Far: 0.7334629694620768
Total elapsed time: 00h 15m 51s
INFO:tensorflow:Oracle triggered exit


In [118]:
tuner.results_summary()

Results summary
Results in project/cardiovascular
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x2f5f919a0>
Trial summary
Hyperparameters:
num_layers: 7
units_0: 480
learning_rate: 0.001
units_1: 416
units_2: 288
units_3: 288
units_4: 192
units_5: 288
units_6: 160
units_7: 384
Score: 0.7334629694620768
Trial summary
Hyperparameters:
num_layers: 3
units_0: 128
learning_rate: 0.001
units_1: 192
units_2: 320
units_3: 224
units_4: 448
units_5: 32
units_6: 384
units_7: 160
Score: 0.7332592805226644
Trial summary
Hyperparameters:
num_layers: 8
units_0: 64
learning_rate: 0.01
units_1: 32
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
units_7: 32
Score: 0.7321851849555969
Trial summary
Hyperparameters:
num_layers: 7
units_0: 64
learning_rate: 0.0001
units_1: 384
units_2: 192
units_3: 384
units_4: 480
units_5: 160
units_6: 384
units_7: 128
Score: 0.7304074366887411
Trial summary
Hyperparameters:
num_layers: 5
units_0: 128
learning_rate: 0.01
units_1: 448

In [91]:
"""
SIN INPUT LAYER:

Results summary
Results in project/cardiovascular
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x2b5c35c70>
Trial summary
Hyperparameters:
num_layers: 2
units_0: 512
units_1: 448
learning_rate: 0.001
units_2: 64
units_3: 512
Score: 0.7332777778307596
Trial summary
Hyperparameters:
num_layers: 4
units_0: 128
units_1: 448
learning_rate: 0.001
units_2: 512
units_3: 160
Score: 0.7326481540997823
Trial summary
Hyperparameters:
num_layers: 6
units_0: 320
units_1: 480
learning_rate: 0.001
units_2: 224
units_3: 352
units_4: 32
units_5: 32
Score: 0.7312777837117513
Trial summary
Hyperparameters:
num_layers: 4
units_0: 384
units_1: 320
learning_rate: 0.01
units_2: 32
units_3: 32
Score: 0.7296666502952576
Trial summary
Hyperparameters:
num_layers: 6
units_0: 288
units_1: 320
learning_rate: 0.01
units_2: 128
units_3: 416
units_4: 96
units_5: 480
Score: 0.7267222205797831

"""

Unnamed: 0,F,M,age,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco
0,1,1,0.164245,0.931816,0.262130,-0.059952,-0.088037,1,1,0,0
1,0,0,-0.818968,0.444028,0.122926,-0.129731,-0.088037,1,1,0,0
2,0,0,0.736805,0.078186,-2.034740,-0.129731,-0.088037,1,1,0,0
3,1,1,0.464524,0.687922,-0.433891,-0.059952,-0.088037,1,1,0,0
4,0,0,1.259048,2.517128,1.445367,0.079606,-0.088037,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...
59995,0,0,0.010047,-0.531549,0.470937,-0.339068,0.121767,3,3,1,1
59996,1,1,-0.530456,0.444028,-0.573095,-0.059952,-0.088037,1,1,0,0
59997,1,1,0.420700,-0.531549,-1.408321,-0.129731,-0.140488,1,1,0,0
59998,1,1,-0.848590,0.931816,-0.225084,0.149385,-0.035586,2,1,0,0
