<a href="https://colab.research.google.com/github/Vcode7/Colabnotbook/blob/main/Lstmwithcgan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("moltean/fruits")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/moltean/fruits?dataset_version_number=11...


100%|██████████| 964M/964M [00:09<00:00, 107MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/moltean/fruits/versions/11


In [2]:
import os
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, LSTM, Reshape, Conv2D, Conv2DTranspose, Flatten, LeakyReLU, BatchNormalization
from tensorflow.keras import Input, Model
import numpy as np

In [7]:
os.listdir('/root/.cache/kagglehub/datasets/moltean/fruits/versions/11/fruits-360_dataset_100x100/fruits-360/Training')

['Potato Sweet 1',
 'Cherry 1',
 'Pear 1',
 'Melon Piel de Sapo 1',
 'Tomato 1',
 'Mango Red 1',
 'Dates 1',
 'Carambula 1',
 'Nectarine 1',
 'Corn 1',
 'Tomato Heart 1',
 'Mango 1',
 'Eggplant 1',
 'Cherry Wax Red 1',
 'Pear Forelle 1',
 'Pepino 1',
 'Beetroot 1',
 'Tomato Yellow 1',
 'Cantaloupe 2',
 'Plum 1',
 'Rambutan 1',
 'Apple Crimson Snow 1',
 'Cucumber Ripe 1',
 'Pepper Red 1',
 'Pear Williams 1',
 'Mangostan 1',
 'Lychee 1',
 'Apple Golden 1',
 'Pear Stone 1',
 'Walnut 1',
 'Quince 1',
 'Pomegranate 1',
 'Peach Flat 1',
 'Banana Red 1',
 'Tomato 4',
 'Grapefruit Pink 1',
 'Pepper Orange 1',
 'Cherry Wax Yellow 1',
 'Onion Red 1',
 'Cabbage white 1',
 'Apple Red Yellow 1',
 'Grape Pink 1',
 'Potato Red Washed 1',
 'Kiwi 1',
 'Tomato 2',
 'Banana 1',
 'Onion Red Peeled 1',
 'Lemon 1',
 'Grape Blue 1',
 'Eggplant long 1',
 'Zucchini 1',
 'Onion White 1',
 'Mulberry 1',
 'Potato White 1',
 'Avocado 1',
 'Huckleberry 1',
 'Cucumber 3',
 'Zucchini dark 1',
 'Apple Red Delicious 1'

In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data paths and parameters
DATASET_PATH = path + '/fruits-360_dataset_100x100/fruits-360/Training'
IMG_HEIGHT, IMG_WIDTH = 64, 64
BATCH_SIZE = 32

# Create an ImageDataGenerator instance
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Rescale pixel values to 0-1
    rotation_range=40,  # Randomly rotate images up to 40 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,  # Randomly shear images
    zoom_range=0.2,  # Randomly zoom images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill mode for padding
)

# Generate training data using flow_from_directory
train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='sparse'  # One-hot encode labels
)

# Get the number of classes (assuming folders represent classes)
num_classes = len(train_generator.class_indices)

# Print the shapes for verification (optional)
image_batch, label_batch = next(train_generator)

print(f"Image batch shape: {image_batch.shape}")
print(f"Label batch shape: {label_batch.shape}")

Found 70491 images belonging to 141 classes.
Image batch shape: (32, 64, 64, 3)
Label batch shape: (32,)


In [9]:

VOCAB_SIZE = 141  # Number of unique classes (labels)
EMBEDDING_DIM = 64  # Dimension of the embedding space
LSTM_UNITS = 128  # Number of LSTM units

# Step 1: Define the LSTM Model
def build_lstm_model(vocab_size, embedding_dim, lstm_units):
    model = tf.keras.Sequential([
        # Embedding layer to convert labels to embeddings
        tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim),

        # LSTM layer to process the label embeddings (one sequence per label)
        tf.keras.layers.LSTM(units=lstm_units, return_sequences=False),  # Only return the last hidden state

        # Dense layer to generate the final encoded representation
        tf.keras.layers.Dense(units=lstm_units, activation='relu')  # Output vector of size lstm_units
    ])
    return model

# Create the model
lstm_model = build_lstm_model(VOCAB_SIZE, EMBEDDING_DIM, LSTM_UNITS)

# Summary of the model to verify shapes
lstm_model.summary()

In [10]:
IMG_CHANNELS = 3
NOISE_DIM = 100  # Dimension of the noise vector
CONDITION_DIM = 128  # Dimension of LSTM output (encoding the condition)

# Step 1: Define the Generator Model
def build_generator(noise_dim, condition_dim):
    noise_input = tf.keras.layers.Input(shape=(noise_dim,))
    label_input = tf.keras.layers.Input(shape=(condition_dim,))

    # Reshape and project the encoded labels to match the noise shape
    label_dense = tf.keras.layers.Dense(noise_dim, activation="relu")(label_input)

    # Concatenate the noise and transformed label input
    concat = tf.keras.layers.Concatenate()([noise_input, label_dense])

    # Continue building the generator model
    x = tf.keras.layers.Dense(8 * 8 * 256, use_bias=False)(concat)
    x = tf.keras.layers.Reshape((8, 8, 256))(x)

    x = tf.keras.layers.Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    x = tf.keras.layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    x = tf.keras.layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')(x)

    generator = tf.keras.models.Model([noise_input, label_input], x)
    return generator

# Step 2: Define the Discriminator Model
def build_discriminator():
    img_input = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    label_input = tf.keras.layers.Input(shape=(CONDITION_DIM,))

    # Process the image input
    x = tf.keras.layers.Conv2D(64, (5, 5), strides=(2, 2), padding="same", activation="relu")(img_input)
    x = tf.keras.layers.Conv2D(128, (5, 5), strides=(2, 2), padding="same", activation="relu")(x)
    x = tf.keras.layers.Conv2D(256, (5, 5), strides=(2, 2), padding="same", activation="relu")(x)
    x = tf.keras.layers.Flatten()(x)

    # Concatenate the label encoding to the flattened image features
    concatenated = tf.keras.layers.Concatenate()([x, label_input])

    # Dense layer for classification
    x = tf.keras.layers.Dense(10, activation="relu")(concatenated)
    x = tf.keras.layers.Dense(1,activation="sigmoid")(x)

    # Define the model with both inputs
    discriminator = tf.keras.Model([img_input, label_input], x, name="discriminator")
    return discriminator

# Step 3: Define the GAN Model (combining generator and discriminator)
def build_gan(generator, discriminator):

    noise_input = tf.keras.layers.Input(shape=(NOISE_DIM,))
    condition_input = tf.keras.layers.Input(shape=(CONDITION_DIM,))
    generated_img = generator([noise_input, condition_input])
    gan_output = discriminator([generated_img, condition_input])  # Pass both the generated image and condition

    # Define GAN model
    gan = tf.keras.Model([noise_input, condition_input], gan_output, name="gan")
    return gan

# Create the models
generator = build_generator(NOISE_DIM, CONDITION_DIM)
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

# Summaries for each model
generator.summary()
discriminator.summary()
gan.summary()

In [11]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

def encode_labels(labels):
    # Labels are integers from 0 to VOCAB_SIZE-1
    # Convert them to sequences (here, single integers)
    labels = tf.reshape(labels, (-1,1))  # Shape: (batch_size, 1)
    encoded = lstm_model(labels)  # Shape: (batch_size, LSTM_UNITS)
    return encoded

# Define discriminative loss
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)  # Real labels are 1
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)  # Fake labels are 0
    total_loss = real_loss + fake_loss
    return total_loss

# Define generative loss
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)  # Generator tries to fool discriminator

# Define optimizers
# ---- Change 1: Define a single optimizer for Generator and LSTM Model  ----
gan_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
EPOCHS = 50
NOISE_DIM = 100
NUM_EXAMPLES_TO_GENERATE = 16

# Define constants
def train_step(images, labels):
    noise = tf.random.normal([BATCH_SIZE, NOISE_DIM])
    encoded_labels = encode_labels(labels)
    images = tf.convert_to_tensor(images, dtype=tf.float32)

    # ----> Change 1: Train Discriminator <----
    with tf.GradientTape() as disc_tape:
        # Set discriminator to trainable
        discriminator.trainable = True

        generated_images = generator([noise, encoded_labels], training=True)
        real_output = discriminator([images, encoded_labels], training=True)
        fake_output = discriminator([generated_images, encoded_labels], training=True)

        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

    # ----> Change 2: Train Generator <----
    with tf.GradientTape() as gen_tape:
        # Freeze discriminator weights
        discriminator.trainable = False

        generated_images = generator([noise, encoded_labels], training=True)
        fake_output = discriminator([generated_images, encoded_labels], training=True)
        gen_loss = generator_loss(fake_output)

    all_trainable_variables = generator.trainable_variables + lstm_model.trainable_variables
    gradients_of_generator = gen_tape.gradient(gen_loss, all_trainable_variables)
    gan_optimizer.apply_gradients(zip(gradients_of_generator, all_trainable_variables))

    return gen_loss, disc_loss

# Define function to generate and save images
def generate_and_save_images(model, epoch, test_noise, test_labels):
    predictions = model([test_noise, test_labels], training=False)
    predictions = (predictions * 127.5) + 127.5  # Rescale to [0, 255]

    fig = plt.figure(figsize=(4, 4))
    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i+1)
        plt.imshow(predictions[i].numpy().astype("uint8"))
        plt.axis('off')

    plt.savefig(f'image_at_epoch_{epoch:04d}.png')
    plt.close()

# Training loop
def train(dataset, epochs):
    for epoch in range(1, epochs + 1):
        gen_loss_list = []
        disc_loss_list = []

        for image_batch, label_batch in dataset:
            gen_loss, disc_loss = train_step(image_batch, label_batch)
            gen_loss_list.append(gen_loss)
            disc_loss_list.append(disc_loss)

        # Generate and save images every 10 epochs
        if epoch % 10 == 0 or epoch == 1:
            generate_and_save_images(generator, epoch, seed_noise, seed_encoded_labels)
            print(f'Epoch {epoch}, Gen Loss: {np.mean(gen_loss_list)}, Disc Loss: {np.mean(disc_loss_list)}')
        else:
            print(f'Epoch {epoch}, Gen Loss: {np.mean(gen_loss_list)}, Disc Loss: {np.mean(disc_loss_list)}')

    # Generate after the final epoch
    generate_and_save_images(generator, epochs, seed_noise, seed_encoded_labels)

# Start training
train(train_generator, EPOCHS)

