In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import layers, Model

In [3]:
X_train = pd.read_parquet(r"C:\Users\gustavo\Documents\Data Science\08-GitHub\Portifolio\Classification\titanic\model_autoencoder\data\processed\feat_X_train.parquet")

In [7]:
y_train = pd.read_parquet(r"C:\Users\gustavo\Documents\Data Science\08-GitHub\Portifolio\Classification\titanic\model_autoencoder\data\processed\feat_y_train.parquet")

In [15]:
idx = y_train.Survived == 1

In [18]:
X_survivied = X_train.loc[idx].copy()

In [19]:
# Converter para TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices(X_survivied).batch(32)

In [21]:
# Parâmetros
input_dim = X_survivied.shape[1]
latent_dim = 2

# Gerador
def build_generator():
    generator = tf.keras.Sequential([
        layers.Dense(16, activation='relu', input_shape=(latent_dim,)),
        layers.BatchNormalization(),
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(input_dim, activation='linear')
    ])
    return generator

# Discriminador
def build_discriminator():
    discriminator = tf.keras.Sequential([
        layers.Dense(32, activation='relu', input_shape=(input_dim,)),
        layers.Dropout(0.2),
        layers.Dense(16, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    return discriminator

# Build models
generator = build_generator()
discriminator = build_discriminator()

# Optimizers
g_optimizer = tf.keras.optimizers.Adam(0.0002)
d_optimizer = tf.keras.optimizers.Adam(0.0002)

# Loss function
cross_entropy = tf.keras.losses.BinaryCrossentropy()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
@tf.function
def train_step(real_data):
    batch_size = tf.shape(real_data)[0]
    noise = tf.random.normal([batch_size, latent_dim])

    with tf.GradientTape() as g_tape, tf.GradientTape() as d_tape:
        # Gerar dados falsos
        fake_data = generator(noise, training=True)

        # Avaliar discriminador
        real_output = discriminator(real_data, training=True)
        fake_output = discriminator(fake_data, training=True)

        # Calcular losses
        d_real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        d_fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        d_loss = d_real_loss + d_fake_loss

        g_loss = cross_entropy(tf.ones_like(fake_output), fake_output)

    # Backpropagation
    d_gradients = d_tape.gradient(d_loss, discriminator.trainable_variables)
    g_gradients = g_tape.gradient(g_loss, generator.trainable_variables)

    d_optimizer.apply_gradients(zip(d_gradients, discriminator.trainable_variables))
    g_optimizer.apply_gradients(zip(g_gradients, generator.trainable_variables))

    return d_loss, g_loss



In [23]:
# Treinamento
epochs = 5000
for epoch in range(epochs):
    for batch in dataset:
        d_loss, g_loss = train_step(batch)

    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, D Loss: {d_loss:.4f}, G Loss: {g_loss:.4f}')

# Gerar dados sintéticos
def generate_synthetic_data(num_samples):
    noise = tf.random.normal([num_samples, latent_dim])
    synthetic_data = generator(noise, training=False)
    return synthetic_data



Epoch 0, D Loss: 1.4451, G Loss: 0.5795
Epoch 1000, D Loss: 1.2086, G Loss: 0.8116
Epoch 2000, D Loss: 0.9809, G Loss: 1.1774
Epoch 3000, D Loss: 0.4344, G Loss: 2.1443
Epoch 4000, D Loss: 0.3771, G Loss: 2.7852


In [24]:
# Gerar 100 amostras sintéticas
synthetic_samples = generate_synthetic_data(100)
synthetic_df = pd.DataFrame(synthetic_samples, columns=X_train.columns)



print(synthetic_df.head(10))

   numerical_pipe__Age  numerical_pipe__SibSp  numerical_pipe__Fare  \
0             0.272416              -0.072726             -0.199029   
1             0.191477              -0.479866             -0.243490   
2            -0.381624              -0.386304             -0.393230   
3            -0.235635               0.030875             -0.296899   
4            -1.144253              -0.001301              2.769097   
5             0.512376               0.050007             -0.109448   
6             0.797452              -0.646170             -0.131762   
7             0.532833              -0.671380             -0.242999   
8            -0.484641              -0.282037             -0.338090   
9            -1.852519               1.319683             -0.186757   

   numerical_pipe__IsAlone  categorical_pipe__Pclass_3  \
0                 0.742327                    0.431745   
1                 0.798753                    0.911804   
2                 0.631166                  

In [26]:
X_survivied

Unnamed: 0,numerical_pipe__Age,numerical_pipe__SibSp,numerical_pipe__Fare,numerical_pipe__IsAlone,categorical_pipe__Pclass_3,categorical_pipe__Pclass_2,categorical_pipe__Sex_male,categorical_pipe__Cabin_missing
146,-0.159697,-0.482373,-0.522920,0.846891,0.902894,-0.514929,0.765962,0.538122
744,0.149776,-0.482373,-0.520115,0.846891,0.902894,-0.514929,0.765962,0.538122
261,-2.016534,3.020077,-0.010854,-1.180790,0.902894,-0.514929,0.765962,0.538122
506,0.304512,-0.482373,-0.127791,-1.180790,-1.107550,1.942017,-1.305547,0.538122
572,0.536617,-0.482373,-0.119381,0.846891,-1.107550,-0.514929,0.765962,-1.858315
...,...,...,...,...,...,...,...,...
347,-0.082329,0.393239,-0.342674,-1.180790,0.902894,-0.514929,-1.305547,0.538122
460,1.465035,-0.482373,-0.115853,0.846891,-1.107550,-0.514929,0.765962,-1.858315
237,-1.629693,-0.482373,-0.122365,-1.180790,-1.107550,1.942017,-1.305547,0.538122
31,-0.082329,0.393239,2.488153,-1.180790,-1.107550,-0.514929,-1.305547,-1.858315


In [25]:
synthetic_df

Unnamed: 0,numerical_pipe__Age,numerical_pipe__SibSp,numerical_pipe__Fare,numerical_pipe__IsAlone,categorical_pipe__Pclass_3,categorical_pipe__Pclass_2,categorical_pipe__Sex_male,categorical_pipe__Cabin_missing
0,0.272416,-0.072726,-0.199029,0.742327,0.431745,-0.578804,0.127671,0.517600
1,0.191477,-0.479866,-0.243490,0.798753,0.911804,-0.484512,0.531174,0.484367
2,-0.381624,-0.386304,-0.393230,0.631166,0.875995,-0.474940,-1.363585,0.451907
3,-0.235635,0.030875,-0.296899,-0.128420,0.877291,-0.487960,-1.287680,0.457523
4,-1.144253,-0.001301,2.769097,-1.212775,-0.686554,-0.721771,-2.146419,-1.612840
...,...,...,...,...,...,...,...,...
95,0.442867,-0.594430,-0.322837,0.825633,0.900253,-0.512084,0.672759,0.443153
96,1.417096,-0.608440,0.809181,0.597894,-0.931453,-0.488025,-1.139117,-1.923290
97,1.334807,-0.626464,0.175019,-0.750366,-0.791007,-0.649485,-1.290329,-1.810493
98,-0.837694,0.149076,-0.467644,-1.121508,0.832142,-0.462108,-1.297661,0.471248
