In [441]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# Cargar el dataset
datasetDiabetes = pd.read_csv("../diabetes.csv")

columnas_con_valores_cero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
datasetDiabetes[columnas_con_valores_cero] = datasetDiabetes[columnas_con_valores_cero].replace(0, pd.NA)


datasetDiabetes.describe()

media_diabetes = datasetDiabetes[datasetDiabetes['Outcome'] == 1].mean()
media_no_diabetes = datasetDiabetes[datasetDiabetes['Outcome'] == 0].mean()

for columna in columnas_con_valores_cero:
    # Imputar la media de los casos con diabetes para valores faltantes
    datasetDiabetes.loc[(datasetDiabetes[columna].isnull()) & (datasetDiabetes['Outcome'] == 1), columna] = media_diabetes[columna]
    # Imputar la media de los casos sin diabetes para valores faltantes
    datasetDiabetes.loc[(datasetDiabetes[columna].isnull()) & (datasetDiabetes['Outcome'] == 0), columna] = media_no_diabetes[columna]

datasetDiabetes['Glucose'] = pd.to_numeric(datasetDiabetes['Glucose'], errors='coerce')
datasetDiabetes['BloodPressure'] = pd.to_numeric(datasetDiabetes['BloodPressure'], errors='coerce')
datasetDiabetes['SkinThickness'] = pd.to_numeric(datasetDiabetes['SkinThickness'], errors='coerce')
datasetDiabetes['Insulin'] = pd.to_numeric(datasetDiabetes['Insulin'], errors='coerce')
datasetDiabetes['BMI'] = pd.to_numeric(datasetDiabetes['BMI'], errors='coerce')


datasetDiabetes.describe()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,121.697358,72.428141,29.247042,157.003527,32.44642,0.471876,33.240885,0.348958
std,3.369578,30.462008,12.106044,8.923908,88.860914,6.87897,0.331329,11.760232,0.476951
min,0.0,44.0,24.0,7.0,14.0,18.2,0.078,21.0,0.0
25%,1.0,99.75,64.0,25.0,121.5,27.5,0.24375,24.0,0.0
50%,3.0,117.0,72.0,28.0,130.287879,32.05,0.3725,29.0,0.0
75%,6.0,141.0,80.0,33.0,206.846154,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [442]:
#Librerías de sckit-learn para Modelo Supervisado de Redes Neuronales 
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

In [443]:
X = datasetDiabetes.drop('Outcome', axis=1)
y = datasetDiabetes['Outcome']

# Dividir los datos en conjuntos de entrenamiento y prueba
trainingDataSetDiabetes, testingDataSetDiabetes, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


# Preprocesamiento de datos: Escalar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(trainingDataSetDiabetes)
X_test_scaled = scaler.transform(testingDataSetDiabetes)

In [444]:
mlp = MLPClassifier(random_state=42, max_iter=200, learning_rate='adaptive', learning_rate_init=0.1, hidden_layer_sizes=(100,))
mlp.fit(X_train_scaled, y_train)

#Los datos max_iter=1000, learning_rate='adaptive', learning_rate_init=0.001, hidden_layer_sizes=(100,) son datos probados para
#encontrar la configuración óptima y puede requerir mayor experimentación.

# Predicciones
y_pred = mlp.predict(X_test_scaled)

# Evaluación del modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.8246753246753247
Precision: 0.7213114754098361
Recall: 0.8148148148148148
F1-score: 0.7652173913043478
Confusion Matrix:
[[83 17]
 [10 44]]


In [445]:
datasetWine = pd.read_csv("../wine.csv")
datasetWine['quality'] = datasetWine['quality'].map({'bad':0,'good':1})

datasetWine

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,0
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,0
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,0
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,1
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,0
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,1
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,1
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,0


In [446]:
X = datasetWine.drop('quality', axis=1)
Y = datasetWine['quality']

# Dividir los datos en conjuntos de entrenamiento y prueba
trainingDataSetStroke, testingDataSetStroke, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)


# Preprocesamiento de datos: Escalar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(trainingDataSetStroke)
X_test_scaled = scaler.transform(testingDataSetStroke)

In [447]:
mlp = MLPClassifier(random_state=30, max_iter=200, learning_rate='adaptive', learning_rate_init=0.1, hidden_layer_sizes=(100,))
mlp.fit(X_train_scaled, y_train)

#Los datos max_iter=1000, learning_rate='adaptive', learning_rate_init=0.001, hidden_layer_sizes=(100,) son datos probados para
#encontrar la configuración óptima y puede requerir mayor experimentación.

# Predicciones
y_pred = mlp.predict(X_test_scaled)

# Evaluación del modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.728125
Precision: 0.8181818181818182
Recall: 0.631578947368421
F1-score: 0.712871287128713
Confusion Matrix:
[[125  24]
 [ 63 108]]
