In [7]:
## Extractor training

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import random 
## Load Trained Generator and discriminator model from previous step.
generator = tf.keras.models.load_model('/Users/anichougule/Masters/Code/Python/SEM3/Computer Vision/cs512-f24-aniket-chougule/Project/Data/generator_model_nlp_latest.h5')
discriminator = tf.keras.models.load_model('/Users/anichougule/Masters/Code/Python/SEM3/Computer Vision/cs512-f24-aniket-chougule/Project/Data/discriminator_model_nlp_latest.h5')

epochs = 20  # Adjust based on resources and desired performance
batch_size = 64  # Should align with your batch size for consistency

# Generate noise vectors for training
num_samples = 600  # Total number of samples for training
# noise_vectors = np.random.uniform(-1, 1, (num_samples, 100))

def build_extractor():
    model = models.Sequential()
    
    # Input layer: Stego image of shape 64x64x3
    model.add(layers.InputLayer(input_shape=(32, 32, 3)))
    
    # Conv1: 32x32x64
    model.add(layers.Conv2D(64, kernel_size=(5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization())
    
    # Conv2: 16x16x128
    model.add(layers.Conv2D(128, kernel_size=(5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization())
    
    # Conv3: 8x8x256
    model.add(layers.Conv2D(256, kernel_size=(5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization())
    
    # Conv4: 4x4x512
    model.add(layers.Conv2D(512, kernel_size=(5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization())
    
    # Flatten and fully connected layer to match the noise vector dimension
    model.add(layers.Flatten())
    model.add(layers.Dense(100, activation='tanh'))
    
    return model
extractor = build_extractor()
extractor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001, beta_1=0.5),
                  loss='mean_squared_error')  # MSE to minimize deviation between z and z'
extractor.summary()





In [None]:
## Main block of Extractor where we trained the extractor.
def map_binary_to_noise(binary_string, target_dim=100, sigma=3):
    noise_vector = []
    for i in range(0, len(binary_string), sigma):
        bit_group = binary_string[i:i + sigma]
        if len(bit_group) < sigma:
            bit_group = bit_group.ljust(sigma, '0')  # Pad with zeros if the last group is smaller
        
        m = int(bit_group, 2)
        noise_value = (m / (2**sigma - 1)) * 2 - 1  # Scale to the range [-1, 1]
        noise_vector.append(noise_value)

    # Ensure the noise vector matches the target dimension
    if len(noise_vector) < target_dim:
        repeat_count = (target_dim - len(noise_vector)) // len(noise_vector)
        remainder = (target_dim - len(noise_vector)) % len(noise_vector)
        noise_vector = np.concatenate([noise_vector] * (repeat_count + 2))[:target_dim]
    else:
        noise_vector = np.array(noise_vector[:target_dim])
    
    return noise_vector
with open('/Users/anichougule/Downloads/corpus_sentences_binary.txt', 'r') as file:
    binary_sentences = file.read().splitlines()

# Example: Convert the first binary sentence to a noise vector
noise_vectors = [map_binary_to_noise(binary_sentence, target_dim=100, sigma=3) for binary_sentence in binary_sentences]
random.shuffle(noise_vectors)
# Convert to Tensor for GAN input
noise_vectors=noise_vectors[:num_samples]
noise_tensors = tf.convert_to_tensor(noise_vectors, dtype=tf.float32)

stego_images = generator.predict(noise_tensors, batch_size=batch_size)

for epoch in range(epochs):
    for step in range(0, num_samples, batch_size):
        # Create a batch of stego images and corresponding noise vectors
        batch_stego_images = stego_images[step:step + batch_size]
        batch_noise_vectors = noise_tensors[step:step + batch_size]
        
        # Train the extractor on the batch
        loss = extractor.train_on_batch(batch_stego_images, batch_noise_vectors)

    # Print progress
    print(f"Epoch {epoch + 1}/{epochs}, Extractor Loss: {loss:.4f}")
## save the extractor model for future use.
extractor.save('/home/sahil_travelapp/e_models_nlp.h5')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
Epoch 1/20, Extractor Loss: 0.6978
Epoch 2/20, Extractor Loss: 0.6461
Epoch 3/20, Extractor Loss: 0.6101
Epoch 4/20, Extractor Loss: 0.5817
Epoch 5/20, Extractor Loss: 0.5577
Epoch 6/20, Extractor Loss: 0.5367
Epoch 7/20, Extractor Loss: 0.5179
Epoch 8/20, Extractor Loss: 0.5009
