## Paso 1: Cargar los datos.
Levantamos los datos de los pacientes

In [None]:
# Read data from file

import numpy as np
import pandas as pd

file_name = 'data/datos_de_pacientes_5000.csv'
data = pd.read_csv(file_name, index_col=0)

In [None]:
print(data)

      colesterol  presion  glucosa  edad  sobrepeso  tabaquismo  \
0            1.0      0.9      1.0     5          0           1   
1            2.4      1.4      1.8    72          0           0   
2            2.2      1.1      0.6    73          0           1   
3            2.2      1.3      1.2     4          0           1   
4            1.8      0.9      1.1    42          0           1   
...          ...      ...      ...   ...        ...         ...   
4995         2.5      1.0      0.9    72          0           1   
4996         2.2      1.0      1.3    72          1           1   
4997         2.4      0.7      1.8    51          1           1   
4998         2.4      1.6      1.0    15          0           1   
4999         2.1      1.3      1.2    27          0           0   

      riesgo_cardiaco  
0                   0  
1                   1  
2                   0  
3                   0  
4                   0  
...               ...  
4995                0  
499

## Paso 2: Preprocesar los datos.

Separamos los datos de entrada de las etiquetas

Separamos conjuntos de training, validación y testing según sea necesario

In [None]:
# Date preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Scaling numerical variables
scaler = MinMaxScaler()

# Separate the data from the target labels
X = data.drop(['riesgo_cardiaco'], axis=1)
y = np.array(data['riesgo_cardiaco'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# For training set
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_train = pd.DataFrame(scaled_X_train, columns=X_train.columns)

# For testing set
scaled_X_test = scaler.fit_transform(X_test)
scaled_X_test = pd.DataFrame(scaled_X_test, columns=X_test.columns)

In [None]:

print(scaled_X_train)

      colesterol   presion   glucosa      edad  sobrepeso  tabaquismo
0       0.210526  0.272727  0.857143  0.040816        1.0         1.0
1       0.842105  0.818182  0.071429  0.938776        1.0         0.0
2       0.315789  0.727273  0.357143  0.346939        1.0         0.0
3       0.842105  0.909091  0.357143  0.979592        1.0         0.0
4       0.894737  0.000000  0.642857  0.367347        1.0         0.0
...          ...       ...       ...       ...        ...         ...
3995    0.894737  0.636364  0.357143  0.030612        0.0         0.0
3996    0.842105  0.636364  0.714286  0.938776        0.0         0.0
3997    0.473684  1.000000  0.928571  0.989796        1.0         0.0
3998    0.421053  0.272727  0.928571  0.275510        1.0         1.0
3999    0.368421  0.727273  0.428571  0.387755        1.0         0.0

[4000 rows x 6 columns]


In [None]:
print(scaled_X_test)

##Paso 3: Armo la red

In [None]:
# Build the Neural Network
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Create the model
model = Sequential()

# 6 INPUT (colesterol, presión, glucosa, edad, sobrepeso, tabaquismo)
model.add(Dense(50, input_shape=(6,), activation='relu', kernel_initializer='uniform'))
model.add(Dense(25, activation='relu', kernel_initializer='random_normal'))
model.add(Dense(35, activation='relu', kernel_initializer='random_normal'))
model.add(Dense(1, activation='sigmoid')) # Sigmoid activation in the output layer

# Compile
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.01))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 50)                350       
                                                                 
 dense_1 (Dense)             (None, 25)                1275      
                                                                 
 dense_2 (Dense)             (None, 35)                910       
                                                                 
 dense_3 (Dense)             (None, 1)                 36        
                                                                 
Total params: 2571 (10.04 KB)
Trainable params: 2571 (10.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


##Paso 4: Entreno la red neuronal

In [None]:
# Training
historicalModel=model.fit(X_train, y_train, verbose=2, batch_size = 10000, epochs=200)

Epoch 1/50
1/1 - 1s - loss: 0.6930 - 1s/epoch - 1s/step
Epoch 2/50
1/1 - 0s - loss: 0.6730 - 15ms/epoch - 15ms/step
Epoch 3/50
1/1 - 0s - loss: 0.6753 - 14ms/epoch - 14ms/step
Epoch 4/50
1/1 - 0s - loss: 0.6684 - 18ms/epoch - 18ms/step
Epoch 5/50
1/1 - 0s - loss: 0.6651 - 14ms/epoch - 14ms/step
Epoch 6/50
1/1 - 0s - loss: 0.6591 - 14ms/epoch - 14ms/step
Epoch 7/50
1/1 - 0s - loss: 0.6560 - 13ms/epoch - 13ms/step
Epoch 8/50
1/1 - 0s - loss: 0.6515 - 14ms/epoch - 14ms/step
Epoch 9/50
1/1 - 0s - loss: 0.6457 - 12ms/epoch - 12ms/step
Epoch 10/50
1/1 - 0s - loss: 0.6423 - 13ms/epoch - 13ms/step
Epoch 11/50
1/1 - 0s - loss: 0.6416 - 14ms/epoch - 14ms/step
Epoch 12/50
1/1 - 0s - loss: 0.6360 - 17ms/epoch - 17ms/step
Epoch 13/50
1/1 - 0s - loss: 0.6269 - 26ms/epoch - 26ms/step
Epoch 14/50
1/1 - 0s - loss: 0.6271 - 16ms/epoch - 16ms/step
Epoch 15/50
1/1 - 0s - loss: 0.6241 - 18ms/epoch - 18ms/step
Epoch 16/50
1/1 - 0s - loss: 0.6111 - 13ms/epoch - 13ms/step
Epoch 17/50
1/1 - 0s - loss: 0.6103 -

<keras.src.callbacks.History at 0x7c26fe0de920>

##Paso 5: Evaluo la red

In [None]:
# Evaluate
result = model.evaluate(X_test, y_test)
print("Evaluate the model: ",result)



In [None]:
# Make predictions with the model
y_pred = model.predict(scaled_X_test)

In [None]:
import matplotlib.pyplot as plt
plt.xlabel("# Epoca")
plt.ylabel("Magnitud de pérdida")
plt.plot(historicalModel.history["loss"])


In [None]:
print("Datos a predecir:")
print(X_train[:3])
print("-----------------")

print("Resultados obtenidos:")
print(y_pred[:3])
print("Valores correctos:")
print(y_train[:3])

In [None]:
import pickle
model_pkl_file = "model.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(model, file)

with open(model_pkl_file, 'rb') as file:  
    model = pickle.load(file)