In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

# Crear un conjunto de datos ficticio
def create_synthetic_data(n_samples=1000):
    np.random.seed(42)
    x1 = np.random.normal(0, 1, n_samples)  # Característica 1
    x2 = np.random.normal(5, 2, n_samples)  # Característica 2
    x3 = np.random.randint(0, 10, n_samples)  # Característica 3
    y = 2 * x1 - 3 * x2 + x3 + np.random.normal(0, 1, n_samples)  # Variable objetivo
    return pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'y': y})

data = create_synthetic_data()

# Definir el Generador
def build_generator():
    model = tf.keras.Sequential()
    model.add(layers.Dense(64, activation='relu', input_dim=10))  # Asegúrate de que el input_dim sea correcto
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(4))  # Genera 4 características (x1, x2, x3, y)
    return model

# Definir el Discriminador
def build_discriminator():
    model = tf.keras.Sequential()
    model.add(layers.Dense(32, activation='relu', input_shape=(4,)))  # 4 características
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))  # Clasificación binaria
    return model

# Crear el modelo GAN
generator = build_generator()
discriminator = build_discriminator()

# Compilar el Discriminador
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Combinación del generador y el discriminador
discriminator.trainable = False  # Desactivar el entrenamiento del discriminador
gan_input = layers.Input(shape=(10,))  # Entrada de ruido y condiciones
gan_output = discriminator(generator(gan_input))
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer='adam')

# Entrenar el GAN
def train_gan(data, epochs, batch_size):
    X = data.values
    noise_dim = 10

    for epoch in range(epochs):
        # Seleccionar un batch aleatorio de datos reales
        idx = np.random.randint(0, X.shape[0], batch_size)
        real_data = X[idx]

        # Generar datos falsos
        noise = np.random.normal(0, 1, (batch_size, noise_dim))
        fake_data = generator.predict(noise)

        # Etiquetas para el discriminador
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))

        # Entrenar el discriminador
        discriminator_loss_real = discriminator.train_on_batch(real_data, real_labels)
        discriminator_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)

        # Entrenar el generador
        noise = np.random.normal(0, 1, (batch_size, noise_dim))
        generator_loss = gan.train_on_batch(noise, real_labels)  # Queremos que el generador produzca datos que el discriminador clasifique como reales

        # Imprimir pérdidas
        if epoch % 100 == 0:
            print(f"Epoch: {epoch}, Discriminator Loss: {discriminator_loss_real[0]}, Generator Loss: {generator_loss}")

# Entrenar el GAN
train_gan(data, epochs=1000, batch_size=32)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Epoch: 0, Discriminator Loss: 2.623276710510254, Generator Loss: [array(1.6485976, dtype=float32), array(1.6485976, dtype=float32), array(0.421875, dtype=float32)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [13]:

# Generar nuevos datos
num_samples_to_generate = 600
noise = np.random.normal(0, 1, (num_samples_to_generate, 10))
synthetic_data = generator.predict(noise)

# Convertir a DataFrame
synthetic_df = pd.DataFrame(synthetic_data, columns=['x1', 'x2', 'x3', 'y'])
print(synthetic_df)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
            x1         x2         x3          y        res
0    29.295900  14.803454  46.973415  46.505394  61.154854
1    26.466639  13.043810  42.692356  41.653114  56.494202
2    25.839518  12.366496  41.817173  40.861126  56.396721
3    31.590761  14.549706  51.452168  49.509010  70.984573
4    23.402617  10.410828  37.950035  37.335800  53.522785
..         ...        ...        ...        ...        ...
595  28.081394  13.152507  46.537289  44.978100  63.242558
596  33.055790  16.454432  53.752972  53.414993  70.501251
597  23.547380  11.046422  38.963047  38.213104  52.918541
598  23.511370  11.134278  37.469948  36.847012  51.089851
599  30.712458  14.059270  50.144436  48.162865  69.391541

[600 rows x 5 columns]
