In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Flatten
from tensorflow.keras.models import Sequential 
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import fetch_openml

In [2]:
# Cargar el dataset desde OpenML
diabetes = fetch_openml(name='diabetes', version=1, as_frame=True)
df = diabetes.frame

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   preg    768 non-null    int64   
 1   plas    768 non-null    int64   
 2   pres    768 non-null    int64   
 3   skin    768 non-null    int64   
 4   insu    768 non-null    int64   
 5   mass    768 non-null    float64 
 6   pedi    768 non-null    float64 
 7   age     768 non-null    int64   
 8   class   768 non-null    category
dtypes: category(1), float64(2), int64(6)
memory usage: 49.0 KB


In [4]:
# Separar caracteristicas (X) y etiqueta (Y)
# Obtener los X eliminando la columna de etiqueta 
X = df.drop(columns=['class'])
# Obtener Y convirtiendo la etiqueta en un valor numérico
y = df['class'].apply(lambda x: 1 if x == 'tested_positive' else 0)

In [5]:
# Dividir el dataset en entrenamiento (80%) y prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=26, stratify=y)

In [6]:
# Estandarizar las caracteristicas (importante para el perceptron)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [7]:
# Tamaño del espacio latente (ruido de entrada para el generador)
latent_dim = 100

In [8]:
def build_generador():
    model = Sequential([
        Dense(32, input_dim=latent_dim),
        LeakyReLU(0.2),
        Dense(64),
        LeakyReLU(0.2),
        Dense(8, activation='tanh')  # Genera 8 características como X_train
    ])
    return model

In [9]:
def build_discriminador():
    model = Sequential([
        Dense(64, input_dim=8),
        LeakyReLU(0.2),
        Dense(32),
        LeakyReLU(0.2),
        Dense(1, activation='sigmoid')
    ])
    return model

In [10]:
# Compilar el modelo 
discriminador = build_discriminador()
discriminador.compile(
    optimizer = Adam(learning_rate=0.0002, beta_1=0.5),
    loss = 'binary_crossentropy',
    metrics=['accuracy']
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
# Compilar el generador (dentro de la GAN)
generador = build_generador()

In [12]:
# Congelar el discriminador durante el entrenamiento del generador
discriminador.trainable = False
# Crear la GAN combinando el generador y el discriminador
gan_input = tf.keras.Input(shape=(latent_dim,))
gan_output = discriminador(generador(gan_input))
gan = tf.keras.Model(gan_input, gan_output)

In [13]:
# Compilar la GAN 
gan.compile(
    optimizer = Adam(learning_rate=0.0002, beta_1=0.5),
    loss = 'binary_crossentropy'
)

In [14]:
def generate_and_save_images(
    epoch, generator, ejemplos=16, dim=(4, 4), figsize=(10, 10)
):
    noise = np.random.normal(0, 1, size=(ejemplos, latent_dim))
    generated_images = generator.predict(noise)
    generated_images = generated_images.reshape(ejemplos, 28, 28)

    plt.figure(figsize=figsize)
    for i in range(generated_images.shape[0]):
        # crear una subgráfica para cada imagen generada
        plt.subplot(dim[0], dim[1], i + 1)
        plt.imshow(generated_images[i], interpolation="nearest", cmap="gray_r")
        plt.axis("off")
    plt.tight_layout()
    plt.savefig(f"gan_generated_epoch_{epoch}.png")
    plt.show()

In [15]:
def train_gan(epochs=1000, batch_size=64):
    batch_count = X_train.shape[0] // batch_size

    for epoch in range(epochs):
        for _ in range(batch_count):
            noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
            generated_data = generador.predict(noise, verbose=0)

            real_data = X_train[np.random.randint(0, X_train.shape[0], size=batch_size)]

            X_combined = np.concatenate([real_data, generated_data])
            y_combined = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))])

            d_loss = discriminador.train_on_batch(X_combined, y_combined)
            g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

        if epoch % 100 == 0:
            print(f"Epoch {epoch} | Discriminator Loss: {d_loss[0]} | Generator Loss: {g_loss}")

In [16]:
train_gan(epochs=10000, batch_size=128)



Epoch 0 | Discriminator Loss: 0.7238147258758545 | Generator Loss: 0.7290153503417969
Epoch 100 | Discriminator Loss: 0.8387024402618408 | Generator Loss: 0.5365782976150513
Epoch 200 | Discriminator Loss: 0.8605435490608215 | Generator Loss: 0.5045706629753113
Epoch 300 | Discriminator Loss: 0.8686620593070984 | Generator Loss: 0.49261409044265747
Epoch 400 | Discriminator Loss: 0.8731999397277832 | Generator Loss: 0.48629555106163025
Epoch 500 | Discriminator Loss: 0.8760713338851929 | Generator Loss: 0.4823617935180664
Epoch 600 | Discriminator Loss: 0.8779171109199524 | Generator Loss: 0.47967514395713806
Epoch 700 | Discriminator Loss: 0.8792669773101807 | Generator Loss: 0.4777219593524933
Epoch 800 | Discriminator Loss: 0.8802993297576904 | Generator Loss: 0.4762371778488159
Epoch 900 | Discriminator Loss: 0.8811635971069336 | Generator Loss: 0.4750687777996063
Epoch 1000 | Discriminator Loss: 0.8818137645721436 | Generator Loss: 0.47412386536598206
Epoch 1100 | Discriminator Lo