# Conditional GAN for Text Generation
This notebook provides a basic template to help you get started with implementing a Conditional GAN (cGAN) for text generation. Follow the instructions in each section to build and train your model.

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

# Check TensorFlow version
print(f'TensorFlow version: {tf.__version__}')

TensorFlow version: 2.16.2


## 1. Data Preparation
Load and preprocess your dataset. Ensure your dataset includes labeled text data for conditional generation.

In [2]:
# Example: Generate dummy data
def generate_dummy_data(num_samples=1000):
    labels = np.random.randint(0, 2, size=(num_samples, 1))
    text_data = [f"Sample text {i} for label {label[0]}" for i, label in enumerate(labels)]
    return text_data, labels

text_data, labels = generate_dummy_data()
print(f"Sample data: {text_data[:5]}\nSample labels: {labels[:5]}")

Sample data: ['Sample text 0 for label 1', 'Sample text 1 for label 1', 'Sample text 2 for label 1', 'Sample text 3 for label 0', 'Sample text 4 for label 1']
Sample labels: [[1]
 [1]
 [1]
 [0]
 [1]]


## 2. Build the Conditional GAN Model
Define the generator and discriminator for the conditional GAN.

In [3]:
# Define the generator
def build_generator(noise_dim, num_classes):
    noise_input = layers.Input(shape=(noise_dim,))
    label_input = layers.Input(shape=(1,), dtype='int32')
    
    label_embedding = layers.Embedding(num_classes, noise_dim)(label_input)
    label_embedding = layers.Flatten()(label_embedding)
    
    combined_input = layers.Concatenate()([noise_input, label_embedding])
    
    x = layers.Dense(128, activation='relu')(combined_input)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(512, activation='relu')(x)
    output = layers.Dense(100, activation='tanh')(x)  # Adjust output size as needed
    
    return tf.keras.Model([noise_input, label_input], output)

# Define the discriminator
def build_discriminator(input_dim, num_classes):
    text_input = layers.Input(shape=(input_dim,))
    label_input = layers.Input(shape=(1,), dtype='int32')
    
    label_embedding = layers.Embedding(num_classes, input_dim)(label_input)
    label_embedding = layers.Flatten()(label_embedding)
    
    combined_input = layers.Concatenate()([text_input, label_embedding])
    
    x = layers.Dense(512, activation='relu')(combined_input)
    x = layers.Dense(256, activation='relu')(x)
    output = layers.Dense(1, activation='sigmoid')(x)
    
    return tf.keras.Model([text_input, label_input], output)

# Instantiate models
noise_dim = 100
num_classes = 2
generator = build_generator(noise_dim, num_classes)
discriminator = build_discriminator(100, num_classes)

generator.summary()
discriminator.summary()

2024-11-21 09:12:22.272293: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-11-21 09:12:22.272349: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-11-21 09:12:22.272368: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-11-21 09:12:22.272804: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-21 09:12:22.272858: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## 3. Compile and Train the GAN
Set up the training loop for the cGAN.

In [4]:
# Define the GAN training process here
# (Skeleton code provided; you will need to complete this based on your dataset and objectives)

## 4. Generate and Evaluate Text
Use the trained generator to create new text conditioned on different labels.

In [5]:
# Example: Generate text
def generate_text(generator, noise_dim, label, num_samples=5):
    noise = np.random.normal(0, 1, (num_samples, noise_dim))
    labels = np.array([label] * num_samples)
    generated_text = generator.predict([noise, labels])
    return generated_text

generated_samples = generate_text(generator, noise_dim, label=1)
print(generated_samples)

2024-11-21 09:12:22.887453: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 767ms/step
[[ 1.84572622e-01 -1.09709740e-01 -1.31677106e-01 -3.00349146e-01
   2.10536659e-01 -2.92106837e-01  1.66337252e-01 -3.12458188e-03
   1.48815155e-01 -1.01565793e-01 -6.02323934e-02 -2.58907050e-01
  -8.04745853e-02 -3.29532981e-01  3.99788171e-02  2.79476166e-01
  -9.63886902e-02  2.14786217e-01  3.22962940e-01 -5.44567071e-02
   2.20609576e-01  2.33464744e-02 -1.14237674e-01  6.97235391e-02
   3.32934618e-01  2.02348635e-01 -8.39197785e-02 -1.40936106e-01
  -1.66755803e-02 -2.37395748e-01  1.76947892e-01 -1.04689322e-01
  -8.09230283e-02  2.62375604e-02  1.16232820e-01 -3.51109147e-01
   4.68486808e-02 -9.26777795e-02 -1.92908812e-02  3.21246266e-01
   3.13420266e-01  2.36106608e-02  5.67048714e-02 -2.57680472e-02
   3.28491449e-01  5.84206097e-02  7.87265524e-02  2.94684440e-01
   1.27954826e-01  7.81693757e-02 -2.77946860e-01 -3.36399041e-02
   8.87914225e-02  3.22111994e-02 -5.47682159e-02  6.25127852e-02
  -