In [1]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

# Carga del conjunto de datos
data = pd.read_csv('diabetes_prediction_dataset.csv')

# Preprocesamiento de los datos
X = data.drop('diabetes', axis=1)
y = data.diabetes

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43)

def encode_gender(label):
    if label == 'Female':
        return 0
    elif label == 'Male':
        return 1
    else:
        return 2

X_train.gender = X_train.gender.apply(encode_gender)
X_test.gender = X_test.gender.apply(encode_gender)

def encode_smoking(label):
    if label == 'No Info':
        return 0
    elif label == 'never':
        return 1
    elif label == 'former':
        return 2
    elif label == 'current':
        return 3
    elif label == 'not current':
        return 4
    else:
        return 5

X_train['smoking_history'] = X_train['smoking_history'].apply(encode_smoking)
X_test['smoking_history'] = X_test['smoking_history'].apply(encode_smoking)

# Escalado de características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Creación y entrenamiento del modelo
model = tf.keras.Sequential([
    tf.keras.layers.Dense(500, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

num_epochs = 5
model.fit(X_train_scaled, y_train, epochs=num_epochs, batch_size=10)

# Evaluación del modelo
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

# Utilizando el clasificador KNN como ejemplo adicional
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.09641996026039124
Test accuracy: 0.9680333137512207
              precision    recall  f1-score   support

           0       0.97      0.99      0.98     27398
           1       0.88      0.62      0.73      2602

    accuracy                           0.96     30000
   macro avg       0.92      0.81      0.85     30000
weighted avg       0.96      0.96      0.96     30000

