In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

https://www.kaggle.com/datasets/alexteboul/diabetes-health-indicators-dataset

In [2]:
df = pd.read_csv('diabetes_012_health_indicators_BRFSS2015.csv')

In [3]:
df.head()

Unnamed: 0,Diabetes_012,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0.0,1.0,1.0,1.0,40.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,5.0,18.0,15.0,1.0,0.0,9.0,4.0,3.0
1,0.0,0.0,0.0,0.0,25.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,3.0,0.0,0.0,0.0,0.0,7.0,6.0,1.0
2,0.0,1.0,1.0,1.0,28.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,5.0,30.0,30.0,1.0,0.0,9.0,4.0,8.0
3,0.0,1.0,0.0,1.0,27.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,11.0,3.0,6.0
4,0.0,1.0,1.0,1.0,24.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,3.0,0.0,0.0,0.0,11.0,5.0,4.0


In [4]:
data_x = df.drop(['Diabetes_012', 'Stroke', 'HeartDiseaseorAttack', 'AnyHealthcare', 'MentHlth', 'DiffWalk', 'NoDocbcCost', 'CholCheck', 'Smoker', 'PhysHlth', 'Education', 'Income'], axis=1)
data_y = df['Diabetes_012']

In [5]:
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.25)

In [6]:
# Se convierten los datos a tensores
train_x = torch.from_numpy(train_x.values).float().to('cpu')
test_x = torch.from_numpy(test_x.values).float().to('cpu')
train_y = torch.from_numpy(train_y.values).long().to('cpu')
test_y = torch.from_numpy(test_y.values).long().to('cpu')

train_y = train_y.view(-1, 1)
test_y = test_y.view(-1, 1)

In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self, topology):
        super(NeuralNetwork, self).__init__()
        self.topology = topology
        self.layers = nn.ModuleList()
        for i in range(len(topology) - 1):
            self.layers.append(nn.Linear(topology[i], topology[i + 1]))

    def forward(self, inputs):
        predicted = inputs
        for i in range(len(self.topology) - 1):
            predicted = torch.sigmoid(input=self.layers[i](predicted))
        return predicted

### Definimos la topología de la red neuronal

In [8]:
parameters = data_x.shape[1]
output = 3
topology = [parameters, 4, 7, 5, output]

### Inicializamos la red neuronal

In [9]:
lr = 0.001
epochs = 1500
status = 100

net = NeuralNetwork(topology=topology)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=net.parameters(), lr=lr)

for epoch in range(1, epochs + 1):
    y_predicted = net(train_x)
    loss = loss_fn(y_predicted, train_y.squeeze())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if epoch % status == 0:
        print(f'Epoch: {epoch}, Loss: {round(loss.item(), 4)}')

    with torch.no_grad():
        y_predicted = net(test_x)
        _, predicted_class = torch.max(y_predicted, dim=1)
        correct = (predicted_class == test_y.squeeze()).sum().item()
        accuracy = 100 * correct / len(test_y)
        if epoch % status == 0:
            print(f'Epoch: {epoch}, Accuracy: {accuracy}%')

Epoch: 100, Loss: 0.9662
Epoch: 100, Accuracy: 84.15326395458845%
Epoch: 200, Loss: 0.8699
Epoch: 200, Accuracy: 84.15326395458845%
Epoch: 300, Loss: 0.8031
Epoch: 300, Accuracy: 84.15326395458845%
Epoch: 400, Loss: 0.7661
Epoch: 400, Accuracy: 84.15326395458845%
Epoch: 500, Loss: 0.7488
Epoch: 500, Accuracy: 84.15326395458845%
Epoch: 600, Loss: 0.7389
Epoch: 600, Accuracy: 84.15326395458845%
Epoch: 700, Loss: 0.7325
Epoch: 700, Accuracy: 84.15326395458845%
Epoch: 800, Loss: 0.7281
Epoch: 800, Accuracy: 84.15326395458845%
Epoch: 900, Loss: 0.7248
Epoch: 900, Accuracy: 84.15326395458845%
Epoch: 1000, Loss: 0.7224
Epoch: 1000, Accuracy: 84.15326395458845%
Epoch: 1100, Loss: 0.7205
Epoch: 1100, Accuracy: 84.15326395458845%
Epoch: 1200, Loss: 0.7189
Epoch: 1200, Accuracy: 84.15326395458845%
Epoch: 1300, Loss: 0.7177
Epoch: 1300, Accuracy: 84.15326395458845%
Epoch: 1400, Loss: 0.7166
Epoch: 1400, Accuracy: 84.15326395458845%
Epoch: 1500, Loss: 0.7158
Epoch: 1500, Accuracy: 84.15326395458845

In [10]:
data_x.columns

Index(['HighBP', 'HighChol', 'BMI', 'PhysActivity', 'Fruits', 'Veggies',
       'HvyAlcoholConsump', 'GenHlth', 'Sex', 'Age'],
      dtype='object')

### Información de las columnas del dataset
HighBP: Hipertensión alta,
HighChol: Colesterol alto,
BMI: Índice de masa corporal,
PhysActivity: Actividad física,
Fruits: Consumo de frutas,
Veggies: Consumo de verduras,
HvyAlcoholConsump: Consumo de alcohol,
GenHlth: Salud general

In [11]:
binary_labels = ['no', 'si']
gen_hlth_labels = ['excelente', 'muy buena', 'buena', 'regular', 'mala']
sex_labels = ['femenino', 'masculino']
age_labels = ['18', '19', '20', '21', '22', '23', '24', 'entre 25 y 59', '60', '61', '62', '63', '80 o más']

high_bp = 0
high_chol = 1
bmi = 42 # Debe ser un valor entre 12 y 98 preferiblemente
phys_activity = 0
fruits = 0
veggies = 0
hvy_alcohol_consump = 0
gen_hlth = 4
sex = 1
age = 7

labels = ['No diabetes', 'Prediabetes', 'Diabetes']
data = torch.Tensor([high_bp, high_chol, bmi, phys_activity, fruits, veggies, hvy_alcohol_consump, gen_hlth, sex, age])
prediction = net(data)

In [12]:
# Clasificación
_, predicted_class = torch.max(prediction, dim=0)
print(f'Una persona de sexo {sex_labels[sex]} de {age_labels[age - 1]} años de edad, con un índice de masa corporal de {bmi}, con una salud general {gen_hlth_labels[gen_hlth - 1]}, que {binary_labels[fruits]} consume frutas, que {binary_labels[veggies]} consume verduras, que {binary_labels[phys_activity]} realiza actividad física y que {binary_labels[hvy_alcohol_consump]} consume alcohol, pertece al grupo de: {labels[predicted_class]}')

Una persona de sexo masculino de 24 años de edad, con un índice de masa corporal de 42, con una salud general regular, que no consume frutas, que no consume verduras, que realiza no actividad física y que no consume alcohol, pertece al grupo de: No diabetes
