In [45]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch import nn
from sklearn.metrics import accuracy_score

import torch
import pandas as pd
import numpy as np

In [46]:
df = pd.read_csv('diabetes_prediction_dataset.csv')

In [47]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0


In [48]:
genderEncoder = LabelEncoder().fit(df['gender'])
smokingEncoder = LabelEncoder().fit(df['smoking_history'])
df['gender'] = genderEncoder.transform(df['gender'])
df['smoking_history'] = smokingEncoder.transform(df['smoking_history'])

In [49]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,0,80.0,0,1,4,25.19,6.6,140,0
1,0,54.0,0,0,0,27.32,6.6,80,0
2,1,28.0,0,0,4,27.32,5.7,158,0
3,0,36.0,0,0,1,23.45,5.0,155,0
4,1,76.0,1,1,1,20.14,4.8,155,0


In [50]:
data_x = df[['age', 'hypertension', 'bmi', 'blood_glucose_level']]
data_y = df['diabetes']

In [51]:
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.1)

In [52]:
class ClassifierNeuralNetwork(nn.Module):
    def __init__(self, x_size, hidden_size, y_size):
        super().__init__()
        self.linear_layer_1 = nn.Linear(x_size, hidden_size)
        self.linear_layer_2 = nn.Linear(hidden_size, y_size)

    def forward(self, data_x):
        x = torch.relu(self.linear_layer_1(data_x))
        x = self.linear_layer_2(x)
        return x

In [53]:
input_size = len(data_x.columns)
hidden_size = 10
output_unique = len(data_y.unique())
model = ClassifierNeuralNetwork(input_size, hidden_size, output_unique)

In [54]:
def transform_data_to_tensor(data: pd.DataFrame) -> torch.Tensor:
    return torch.tensor(data.to_numpy(), dtype=torch.float32)

x_train = transform_data_to_tensor(x_train)
x_test = transform_data_to_tensor(x_test)
y_train = transform_data_to_tensor(y_train)
y_test = transform_data_to_tensor(y_test)

In [55]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [56]:
epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(x_train)
    loss = criterion(output, y_train.long())
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoca: {epoch}, perdida: {loss.item()}')

model.eval()
with torch.no_grad():
    output = model(x_test)
    loss = criterion(output, y_test.long())
    print(f'Perdida: {loss.item()}')

    _, predicted = torch.max(output.data, 1)
    print(f'Predicciones: {predicted.numpy()}, Esperado: {y_test.numpy()}')

print(f'Score: {accuracy_score(y_test, predicted) * 100}%')

Epoca: 0, perdida: 1.4336349964141846


Epoca: 100, perdida: 0.2716293931007385
Epoca: 200, perdida: 0.2418866753578186
Epoca: 300, perdida: 0.2206340730190277
Epoca: 400, perdida: 0.2023240178823471
Epoca: 500, perdida: 0.19037699699401855
Epoca: 600, perdida: 0.18273936212062836
Epoca: 700, perdida: 0.1776161640882492
Epoca: 800, perdida: 0.1765838861465454
Epoca: 900, perdida: 0.17328527569770813
Perdida: 0.17722675204277039
Predicciones: [0 0 0 ... 0 0 0], Esperado: [0. 0. 0. ... 1. 1. 0.]
Score: 94.12%


In [59]:
age = 20
hypertension = 1
bmi = 40
blood_glucose_level = 250
hypertension_labels = {0: 'no tiene hipertensión', 1: 'tiene hipertensión'}
diabetes_labels = {0: 'no tiene diabetes', 1: 'tiene diabetes'}

new_row = torch.tensor([[age, hypertension, bmi, blood_glucose_level]], dtype=torch.float32)
model.eval()
with torch.no_grad():
    output = model(new_row)
    _, predicted = torch.max(output.data, 1)
    print(f'La persona de {age} años de edad, que {hypertension_labels[hypertension]}, tiene un bmi de {bmi} y tiene {blood_glucose_level} glucosa en la sangre, {diabetes_labels[predicted.numpy()[0]]}')

La persona de 20 años de edad, que tiene hipertensión, tiene un bmi de 40 y tiene 250 glucosa en la sangre, no tiene diabetes
