In [30]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split, TensorDataset


In [31]:
df = pd.read_csv('CVD_cleaned.csv')

In [32]:
df.head()

Unnamed: 0,General_Health,Checkup,Exercise,Heart_Disease,Skin_Cancer,Other_Cancer,Depression,Diabetes,Arthritis,Sex,Age_Category,Height_(cm),Weight_(kg),BMI,Smoking_History,Alcohol_Consumption,Fruit_Consumption,Green_Vegetables_Consumption,FriedPotato_Consumption
0,Poor,Within the past 2 years,No,No,No,No,No,No,Yes,Female,70-74,150.0,32.66,14.54,Yes,0.0,30.0,16.0,12.0
1,Very Good,Within the past year,No,Yes,No,No,No,Yes,No,Female,70-74,165.0,77.11,28.29,No,0.0,30.0,0.0,4.0
2,Very Good,Within the past year,Yes,No,No,No,No,Yes,No,Female,60-64,163.0,88.45,33.47,No,4.0,12.0,3.0,16.0
3,Poor,Within the past year,Yes,Yes,No,No,No,Yes,No,Male,75-79,180.0,93.44,28.73,No,0.0,30.0,30.0,8.0
4,Good,Within the past year,No,No,No,No,No,No,No,Male,80+,191.0,88.45,24.37,Yes,0.0,8.0,4.0,0.0


In [33]:
encoder_health = LabelEncoder()
encoder_health.fit(df['General_Health'])
df['General_Health'] = encoder_health.transform(df['General_Health'])

encoder_checkup = LabelEncoder()
encoder_checkup.fit(df['Checkup'])
df['Checkup'] = encoder_checkup.transform(df['Checkup'])

encoder_exercise = LabelEncoder()
encoder_exercise.fit(df['Exercise'])
df['Exercise'] = encoder_exercise.transform(df['Exercise'])

encoder_heart = LabelEncoder()
encoder_heart.fit(df['Heart_Disease'])
df['Heart_Disease'] = encoder_heart.transform(df['Heart_Disease'])

encoder_depression = LabelEncoder()
encoder_depression.fit(df['Depression'])
df['Depression'] = encoder_depression.transform(df['Depression'])

encoder_diabetes = LabelEncoder()
encoder_diabetes.fit(df['Diabetes'])
df['Diabetes'] = encoder_diabetes.transform(df['Diabetes'])

encoder_arthritis = LabelEncoder()
encoder_arthritis.fit(df['Arthritis'])
df['Arthritis'] = encoder_arthritis.transform(df['Arthritis'])

encoder_sex = LabelEncoder()
encoder_sex.fit(df['Sex'])
df['Sex'] = encoder_sex.transform(df['Sex'])

encoder_age = LabelEncoder()
encoder_age.fit(df['Age_Category'])
df['Age_Category'] = encoder_age.transform(df['Age_Category'])

encoder_smoking = LabelEncoder()
encoder_smoking.fit(df['Smoking_History'])
df['Smoking_History'] = encoder_smoking.transform(df['Smoking_History'])

df = df.drop(['Skin_Cancer', 'Other_Cancer'], axis=1)

In [34]:
data_x = df.drop(['Heart_Disease'], axis=1)
data_y = df['Heart_Disease']

train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.1)

In [35]:
n_entradas = train_x.shape[1]

In [36]:
train_x = torch.from_numpy(train_x.values).float().to('cpu')
test_x = torch.from_numpy(test_x.values).float().to('cpu')
train_y = torch.from_numpy(train_y.values).float().to('cpu')
test_y = torch.from_numpy(test_y.values).float().to('cpu')
train_y = train_y[:, None]
test_y = test_y[:, None]


In [37]:
class Red(nn.Module):

    def __init__(self, n_entradas):
        super(Red, self).__init__()
        self.linear1 = nn.Linear(n_entradas, 15)
        self.linear2 = nn.Linear(15, 10)
        self.linear3 = nn.Linear(10, 1)

    def forward(self, inputs):
        pred_1 = torch.sigmoid(input=self.linear1(inputs))
        pred_2 = torch.sigmoid(input=self.linear2(pred_1))
        pred_f = torch.sigmoid(input=self.linear3(pred_2))
        return pred_f

In [38]:
lr = 0.5
epochs = 100
estatus_print = 100

model = Red(n_entradas= n_entradas)
print(model.parameters)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
historico = pd.DataFrame()

for epoch in range(1, epochs+1):
    y_pred = model(train_x)
    loss = loss_fn(input= y_pred, target=train_y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if epoch % estatus_print == 0:
        print(f'\nEpoch: {epoch} \tLoss: {loss.item():.4f}')

    with torch.no_grad():
        y_pred = model(test_x)
        y_pred_class = y_pred.round()
        correct = (y_pred_class == test_y).sum()
        accuracy = 100 * correct / float(len(test_y))
        if epoch % estatus_print == 0:
            print("Accuracy: {}".format(accuracy.item()))

    df_tmp = pd.DataFrame(data={
        'epoch': epoch,
        'loss': round(loss.item(), 4),
        'accuracy': round(accuracy.item(), 4)
    }, index=[0])
    historico = pd.concat(objs=[historico, df_tmp], ignore_index=True, sort=False) 

print("Accuracy final: {}".format(round(accuracy.item(), 4)))

<bound method Module.parameters of Red(
  (linear1): Linear(in_features=16, out_features=15, bias=True)
  (linear2): Linear(in_features=15, out_features=10, bias=True)
  (linear3): Linear(in_features=10, out_features=1, bias=True)
)>

Epoch: 100 	Loss: -127.4560
Accuracy: 11.639577865600586
Accuracy final: 11.6396


In [39]:
prediccion = model(test_x[4])
print("Predicción: {}".format(prediccion.item())) 

Predicción: 1.0
