In [113]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split


In [114]:
df = pd.read_csv('diabetes_prediction_dataset.csv')
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0


In [115]:
gender_labels = {0:'Femenino', 1:'Masculino', 2:'Otro'}
df.gender = df.gender.map({'Female':0, 'Male':1, 'Other':2})

smoking_labels = {0:'never', 1:'No Info', 2:'current', 3:'former', 4:'ever', 5:'not current'}
df.smoking_history = df.smoking_history.map({'never':0, 'No Info':1, 'current':2, 'former':3, 'ever':4, 'not current':5})

In [116]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,0,80.0,0,1,0,25.19,6.6,140,0
1,0,54.0,0,0,1,27.32,6.6,80,0
2,1,28.0,0,0,0,27.32,5.7,158,0
3,0,36.0,0,0,2,23.45,5.0,155,0
4,1,76.0,1,1,2,20.14,4.8,155,0


In [117]:
df.describe()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,0.41466,41.885856,0.07485,0.03942,1.30695,27.320767,5.527507,138.05806,0.085
std,0.493031,22.51684,0.26315,0.194593,1.454501,6.636783,1.070672,40.708136,0.278883
min,0.0,0.08,0.0,0.0,0.0,10.01,3.5,80.0,0.0
25%,0.0,24.0,0.0,0.0,0.0,23.63,4.8,100.0,0.0
50%,0.0,43.0,0.0,0.0,1.0,27.32,5.8,140.0,0.0
75%,1.0,60.0,0.0,0.0,2.0,29.58,6.2,159.0,0.0
max,2.0,80.0,1.0,1.0,5.0,95.69,9.0,300.0,1.0


In [118]:
data_x = df.drop(['diabetes'], axis=1)
data_y = df.diabetes

In [119]:
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.1)

In [120]:
t_train_x = torch.tensor(train_x.values).float().to('cpu')
t_test_x = torch.tensor(test_x.values).float().to('cpu')
t_train_y = torch.tensor(train_y.values).float().to('cpu')
t_test_y = torch.tensor(test_y.values).float().to('cpu')

In [121]:
t_train_y = t_train_y[:, None]
t_test_y = t_test_y[:, None]

# Clase Neural Network

In [125]:
class NeuralNetwork(nn.Module):
    def __init__(self, n_entradas):
        super(NeuralNetwork, self).__init__()
        self.capa1 = nn.Linear(n_entradas, 25)
        self.capa2 = nn.Linear(25, 23)
        self.capa3 = nn.Linear(23, 1)
    
    def forward(self, x):
        pred_1 = torch.sigmoid(input=self.capa1(x))
        pred_2 = torch.sigmoid(input=self.capa2(pred_1))
        pred_f = torch.sigmoid(input=self.capa3(pred_2))
        return pred_f

In [126]:
# Inicializamos el modelo
neural_network = NeuralNetwork(n_entradas=t_train_x.shape[1])
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(params=neural_network.parameters(), lr=0.001)
epochs = 1000

for epoch in range(epochs):
    y_pred = neural_network(t_train_x)
    loss = loss_fn(input=y_pred, target=t_train_y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    if epoch % 100 == 0:
        print(f'Epoca {epoch} | Loss: {loss.item()}')
    
    with torch.no_grad():
        y_pred_aux = neural_network(t_test_x)
        y_pred_aux = y_pred_aux.round()
        correct = (y_pred_aux == t_test_y).sum()
        accuracy = 100 * correct / float(len(t_test_y))
        if epoch % 100 == 0:
            print(f'Accuracy: {accuracy.item()}')

print(f'Accuracy final: {accuracy.item()}')

Epoca 0 | Loss: 0.7084677815437317
Accuracy: 8.40999984741211
Epoca 100 | Loss: 0.30585867166519165
Accuracy: 91.58999633789062
Epoca 200 | Loss: 0.28689900040626526
Accuracy: 91.58999633789062
Epoca 300 | Loss: 0.2839931845664978
Accuracy: 91.58999633789062
Epoca 400 | Loss: 0.27814140915870667
Accuracy: 91.58999633789062
Epoca 500 | Loss: 0.27002769708633423
Accuracy: 91.58999633789062
Epoca 600 | Loss: 0.26068273186683655
Accuracy: 91.58999633789062
Epoca 700 | Loss: 0.2495860606431961
Accuracy: 91.58999633789062
Epoca 800 | Loss: 0.23377645015716553
Accuracy: 91.58999633789062
Epoca 900 | Loss: 0.21826449036598206
Accuracy: 91.58999633789062
Accuracy final: 92.27999877929688


In [143]:
t_test_x[0]

tensor([  1.0000,  32.0000,   0.0000,   0.0000,   0.0000,  27.3200,   4.8000,
        160.0000])

In [144]:
# Predecir
gender = 0
age = 50
hypertension = 1
heart_disease = 1
smoking_history = 2
bmi = 20.48
HbA1c_level = 6
blood_glucose_level = 155

aux = [gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level]
t_aux = torch.tensor(aux).float().to('cpu')
predicted = neural_network(t_aux)

In [145]:
predicted.item()

0.5133612751960754

In [147]:
hypertension_labels = {1:'tiene hipertension', 0:'no tiene hipertension'}
heart_disease_labels = {1:'tiene cardiopatia', 0:'no tiene cardiopatia'}
diabetes_labels = {1:'Tiene diabetes', 0:'No tiene diabetes'}

In [149]:
print(f'Una persona de genero {gender_labels[gender]}, con {age} años, que {hypertension_labels[hypertension]}, que {heart_disease_labels[heart_disease]}.\nSmoking history : {smoking_labels[smoking_history]}, bmi : {bmi}, HbA1c level : {HbA1c_level}, Nivel de glucosa en sangre: {blood_glucose_level}.\n{diabetes_labels[round(predicted.item())]}')

Una persona de genero Femenino, con 50 años, que tiene hipertension, que tiene cardiopatia.
Smoking history : current, bmi : 20.48, HbA1c level : 6, Nivel de glucosa en sangre: 155.
Tiene diabetes
