In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
print("hello") 

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

hello


## importing libararies

In [5]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import time

## Configuration

In [7]:
tf.random.set_seed(42)
np.random.seed(42)

# Define improved parameters
BATCH_SIZE = 16  # Increased from 8
EPOCHS = 200     # Increased from 100
LATENT_DIM = 256  # Increased from 128
NUM_CLASSES = 7
IMAGE_SIZE = (128, 128)  # Reduced from 256x256 for faster training
CHANNELS = 3  # RGB images

# Dataset path (update this to your Kaggle dataset path)
DATA_DIR = "/kaggle/input/mangoleaf-dataset/dataset"  # Update this path

# Class names mapping
CLASS_NAMES = ['ANTHRACNOSE', 'BACTERIAL_CRANKER', 'DIEBACK', 
               'GALL_MILDGE', 'HEALTHY', 'MANGO_SOOTY', 'SOOTY_MOULD']

## Define instance normalization layer

In [8]:
class InstanceNormalization(layers.Layer):
    def __init__(self, epsilon=1e-5):
        super(InstanceNormalization, self).__init__()
        self.epsilon = epsilon

    def build(self, input_shape):
        self.scale = self.add_weight(
            name='scale',
            shape=input_shape[-1:],
            initializer='ones',
            trainable=True)
        self.offset = self.add_weight(
            name='offset',
            shape=input_shape[-1:],
            initializer='zeros',
            trainable=True)

    def call(self, x):
        mean, variance = tf.nn.moments(x, axes=[1, 2], keepdims=True)
        inv = tf.math.rsqrt(variance + self.epsilon)
        normalized = (x - mean) * inv
        return self.scale * normalized + self.offset

def preprocess_data():
    """Load and preprocess the dataset with improved augmentation."""
    print("Loading and preprocessing data...")
    
    # Enhanced data augmentation for more variety
    datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2,
        rotation_range=10,      # Small rotations
        width_shift_range=0.1,  # Small horizontal shifts
        height_shift_range=0.1, # Small vertical shifts
        brightness_range=[0.9, 1.1],  # Small brightness variations
        zoom_range=0.1,         # Small zoom changes
        horizontal_flip=True    # Horizontal flips
    )
    
    # Load training data
    train_generator = datagen.flow_from_directory(
        DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        subset='training',
        shuffle=True
    )
    
    # Load validation data
    val_generator = datagen.flow_from_directory(
        DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        subset='validation',
        shuffle=True
    )
    
    return train_generator, val_generator

## Generator

In [9]:
def build_generator():
    """Build an improved generator model with residual connections and instance normalization."""
    # Input for latent vector
    latent_input = layers.Input(shape=(LATENT_DIM,))
    
    # Input for class label (one-hot encoded)
    label_input = layers.Input(shape=(NUM_CLASSES,))
    
    # Concatenate with wider initial layer
    x = layers.Concatenate()([latent_input, label_input])
    x = layers.Dense(16 * 16 * 256)(x)  # Start with larger initial feature map
    x = InstanceNormalization()(x)  # Use Instance Normalization instead of BatchNorm
    x = layers.LeakyReLU(0.2)(x)
    x = layers.Reshape((16, 16, 256))(x)
    
    # Define residual block with instance normalization
    def residual_block(x, filters):
        shortcut = x
        x = layers.Conv2D(filters, 3, padding='same')(x)
        x = InstanceNormalization()(x)
        x = layers.LeakyReLU(0.2)(x)
        x = layers.Conv2D(filters, 3, padding='same')(x)
        x = InstanceNormalization()(x)
        # Add skip connection
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, padding='same')(shortcut)
        x = layers.Add()([shortcut, x])
        return x
    
    # Upsampling with residual blocks
    x = layers.Conv2DTranspose(128, 4, strides=2, padding='same')(x)  # 32x32
    x = InstanceNormalization()(x)
    x = layers.LeakyReLU(0.2)(x)
    x = residual_block(x, 128)
    
    x = layers.Conv2DTranspose(64, 4, strides=2, padding='same')(x)   # 64x64
    x = InstanceNormalization()(x)
    x = layers.LeakyReLU(0.2)(x)
    x = residual_block(x, 64)
    
    x = layers.Conv2DTranspose(32, 4, strides=2, padding='same')(x)   # 128x128
    x = InstanceNormalization()(x)
    x = layers.LeakyReLU(0.2)(x)
    x = residual_block(x, 32)
    
    # Output layer with tanh activation
    output = layers.Conv2D(CHANNELS, 3, padding='same', activation='tanh')(x)
    
    model = models.Model([latent_input, label_input], output, name='generator')
    return model

## Discriminator

In [10]:
def build_discriminator():
    """Build an improved discriminator model with spectral normalization and feature outputs."""
    # Input for image
    image_input = layers.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], CHANNELS))
    
    # Input for class label (one-hot encoded)
    label_input = layers.Input(shape=(NUM_CLASSES,))
    
    # Embed label to match image dimensions
    label_embedding = layers.Dense(IMAGE_SIZE[0] * IMAGE_SIZE[1])(label_input)
    label_embedding = layers.Reshape((IMAGE_SIZE[0], IMAGE_SIZE[1], 1))(label_embedding)
    
    # Concatenate image and label
    x = layers.Concatenate()([image_input, label_embedding])
    
    # Spectral normalization wrapper for Conv2D layers
    def spectral_norm_conv(x, filters, kernel_size=4, strides=2, padding='same'):
        # A simplified spectral normalization using layer normalization for stability
        x = layers.Conv2D(filters, kernel_size, strides=strides, padding=padding)(x)
        x = layers.LayerNormalization()(x)
        return x
    
    # Apply convolutional layers with spectral normalization
    x = spectral_norm_conv(x, 32)  # 64x64
    x = layers.LeakyReLU(0.2)(x)
    x = layers.Dropout(0.25)(x)
    
    x = spectral_norm_conv(x, 64)  # 32x32
    x = layers.LeakyReLU(0.2)(x)
    x = layers.Dropout(0.25)(x)
    
    x = spectral_norm_conv(x, 128)  # 16x16
    x = layers.LeakyReLU(0.2)(x)
    x = layers.Dropout(0.25)(x)
    
    x = spectral_norm_conv(x, 256)  # 8x8
    x = layers.LeakyReLU(0.2)(x)
    x = layers.Dropout(0.25)(x)
    
    # Add mini-batch discrimination
    def minibatch_stddev(x):
        # A simplified version of minibatch discrimination
        mean = tf.reduce_mean(x, axis=0, keepdims=True)
        mean_diff = tf.reduce_mean(tf.abs(x - mean), axis=-1, keepdims=True)
        mean_diff = tf.tile(mean_diff, [1, 1, 1, 1])  # Match feature dimensions
        return tf.concat([x, mean_diff], axis=-1)
    
    # Add minibatch discrimination layer
    x = minibatch_stddev(x)
    
    # Flatten and feature extraction
    x = layers.Flatten()(x)
    features = layers.Dense(512)(x)
    features = layers.LeakyReLU(0.2)(features)
    
    # Output layer
    output = layers.Dense(1, activation='sigmoid')(features)
    
    model = models.Model([image_input, label_input], [output, features], name='discriminator')
    return model

## custom data generator wrapper 

In [11]:
class TensorDataGenerator:
    def __init__(self, keras_generator):
        self.keras_generator = keras_generator
        self.n = len(keras_generator)
        self.batch_size = keras_generator.batch_size
        self.samples = keras_generator.samples
        self.class_indices = keras_generator.class_indices
    
    def __len__(self):
        return len(self.keras_generator)
    
    def __getitem__(self, idx):
        images, labels = self.keras_generator[idx]
        # Convert to TensorFlow tensors
        return tf.convert_to_tensor(images, dtype=tf.float32), tf.convert_to_tensor(labels, dtype=tf.float32)

## Gradient penalty function for WGAN-GP stability

In [12]:
def gradient_penalty(discriminator, real_images, fake_images, labels):
    batch_size = tf.shape(real_images)[0]
    alpha = tf.random.uniform([batch_size, 1, 1, 1], 0.0, 1.0)
    interpolated = real_images * alpha + fake_images * (1 - alpha)
    
    with tf.GradientTape() as gp_tape:
        gp_tape.watch(interpolated)
        pred, _ = discriminator([interpolated, labels], training=True)
    
    grads = gp_tape.gradient(pred, interpolated)
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

## define custom trainig loop

In [13]:
def train_gan(generator, discriminator, train_generator, val_generator, epochs, output_dir):
    """Custom training loop for the GAN with stability improvements."""
    # Convert to tensor generators
    train_gen = TensorDataGenerator(train_generator)
    val_gen = TensorDataGenerator(val_generator)
    
    # Create optimizers with different learning rates for generator and discriminator
    generator_lr = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.00005,  # Reduced learning rate for generator
        decay_steps=1000,
        decay_rate=0.95)
    
    discriminator_lr = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.0002,  # Higher learning rate for discriminator
        decay_steps=1000,
        decay_rate=0.95)
    
    generator_optimizer = optimizers.Adam(learning_rate=generator_lr, beta_1=0.5, beta_2=0.999)
    discriminator_optimizer = optimizers.Adam(learning_rate=discriminator_lr, beta_1=0.5, beta_2=0.999)
    
    # Loss functions
    bce_loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
    
    # Prepare for early stopping with higher patience
    best_g_loss = float('inf')
    patience = 25
    patience_counter = 0
    
    # Training metrics history
    history = {'d_loss': [], 'g_loss': [], 'val_d_loss': [], 'val_g_loss': []}
    
    # Fixed noise for generating sample images
    sample_per_class = 2
    fixed_noise = tf.random.normal(shape=(NUM_CLASSES * sample_per_class, LATENT_DIM))
    fixed_labels = np.zeros((NUM_CLASSES * sample_per_class, NUM_CLASSES))
    for i in range(NUM_CLASSES):
        for j in range(sample_per_class):
            fixed_labels[i * sample_per_class + j, i] = 1
    fixed_labels = tf.convert_to_tensor(fixed_labels, dtype=tf.float32)
    
    # Create output directory for samples
    samples_dir = os.path.join(output_dir, "samples")
    os.makedirs(samples_dir, exist_ok=True)
    
    # Lambda values for different loss components
    lambda_gp = 10.0        # Gradient penalty weight
    lambda_feat = 10.0      # Feature matching weight
    lambda_div = 0.1        # Diversity weight
    
    # Training loop
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        start_time = time.time()
        
        # Initialize metrics
        train_d_losses = []
        train_g_losses = []
        train_d_accs = []
        
        # Calculate noise standard deviation (decreases over time for instance noise)
        noise_std = max(0.0, 0.05 * (1.0 - epoch / (EPOCHS * 0.3)))
        
        # Training
        for batch_idx in range(len(train_gen)):
            # Get a batch of real images
            real_images, one_hot_labels = train_gen[batch_idx]
            batch_size = real_images.shape[0]
            
            # Skip batches with unexpected sizes (last batch might be smaller)
            if batch_size != BATCH_SIZE:
                continue
            
            # Generate random noise
            random_latent_vectors = tf.random.normal(shape=(batch_size, LATENT_DIM))
            
            # Generate fake images
            generated_images = generator([random_latent_vectors, one_hot_labels], training=True)
            
            # Add instance noise to both real and fake images (decreases over time)
            real_images_noisy = real_images + tf.random.normal(tf.shape(real_images), mean=0.0, stddev=noise_std)
            generated_images_noisy = generated_images + tf.random.normal(tf.shape(generated_images), mean=0.0, stddev=noise_std)
            
            # Use label smoothing for real labels (0.9 instead of 1.0)
            real_labels = tf.ones((batch_size, 1)) * 0.9
            fake_labels = tf.zeros((batch_size, 1))
            
            # Train discriminator
            with tf.GradientTape() as d_tape:
                # Discriminator on real images
                real_output, real_features = discriminator([real_images_noisy, one_hot_labels], training=True)
                # Discriminator on fake images
                fake_output, fake_features = discriminator([generated_images_noisy, one_hot_labels], training=True)
                
                # Calculate discriminator loss
                d_loss_real = bce_loss(real_labels, real_output)
                d_loss_fake = bce_loss(fake_labels, fake_output)
                d_loss = d_loss_real + d_loss_fake
                
                # Add gradient penalty
                gp = gradient_penalty(discriminator, real_images, generated_images, one_hot_labels)
                d_loss += lambda_gp * gp
            
            # Calculate discriminator accuracy
            d_acc = (tf.reduce_mean(tf.cast(real_output > 0.5, tf.float32)) * 0.5 + 
                     tf.reduce_mean(tf.cast(fake_output < 0.5, tf.float32)) * 0.5)
            train_d_accs.append(d_acc.numpy())
            
            # Calculate gradients and update discriminator weights
            d_gradients = d_tape.gradient(d_loss, discriminator.trainable_variables)
            # Clip gradients to prevent exploding gradients
            d_gradients, _ = tf.clip_by_global_norm(d_gradients, 1.0)
            discriminator_optimizer.apply_gradients(zip(d_gradients, discriminator.trainable_variables))
            
            # Store discriminator loss
            train_d_losses.append(d_loss.numpy())
            
            # Train generator every step (removing d_acc < 0.8 condition)
            # Generate two different noise vectors for diversity loss
            noise1 = tf.random.normal(shape=(batch_size, LATENT_DIM))
            noise2 = tf.random.normal(shape=(batch_size, LATENT_DIM))
            
            with tf.GradientTape() as g_tape:
                # Generate fake images from both noise vectors
                generated_images1 = generator([noise1, one_hot_labels], training=True)
                generated_images2 = generator([noise2, one_hot_labels], training=True)
                
                # Discriminator output for fake images
                fake_output1, fake_features1 = discriminator([generated_images1, one_hot_labels], training=True)
                _, fake_features2 = discriminator([generated_images2, one_hot_labels], training=True)
                
                # Basic generator loss (we want discriminator to classify fake images as real)
                g_loss = bce_loss(real_labels, fake_output1)
                
                # Add feature matching loss
                feature_matching_loss = tf.reduce_mean(tf.abs(fake_features1 - real_features))
                g_loss += lambda_feat * feature_matching_loss
                
                # Add diversity loss (mode seeking)
                img_distance = tf.reduce_mean(tf.abs(generated_images1 - generated_images2))
                noise_distance = tf.reduce_mean(tf.abs(noise1 - noise2))
                diversity_loss = 1.0 / (img_distance / noise_distance + 1e-6)
                g_loss += lambda_div * diversity_loss
                
                # Add L2 regularization to prevent extreme weights
                l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in generator.trainable_variables])
                g_loss += 0.0001 * l2_reg
            
            # Calculate gradients and update generator weights
            g_gradients = g_tape.gradient(g_loss, generator.trainable_variables)
            # Clip gradients to prevent exploding gradients
            g_gradients, _ = tf.clip_by_global_norm(g_gradients, 1.0)
            generator_optimizer.apply_gradients(zip(g_gradients, generator.trainable_variables))
            
            # Store generator loss
            train_g_losses.append(g_loss.numpy())
            
            # Break after one epoch
            if batch_idx >= len(train_gen) - 1:
                break
        
        # Validation
        val_d_losses = []
        val_g_losses = []
        
        for batch_idx in range(len(val_gen)):
            # Get a batch of real images
            real_images, one_hot_labels = val_gen[batch_idx]
            batch_size = real_images.shape[0]
            
            # Skip batches with unexpected sizes
            if batch_size != BATCH_SIZE:
                continue
            
            # Generate random noise
            random_latent_vectors = tf.random.normal(shape=(batch_size, LATENT_DIM))
            
            # Generate fake images
            generated_images = generator([random_latent_vectors, one_hot_labels], training=False)
            
            # Evaluate discriminator
            real_output, real_features = discriminator([real_images, one_hot_labels], training=False)
            fake_output, fake_features = discriminator([generated_images, one_hot_labels], training=False)
            
            # Calculate discriminator loss
            real_labels = tf.ones((batch_size, 1)) * 0.9
            fake_labels = tf.zeros((batch_size, 1))
            d_loss_real = bce_loss(real_labels, real_output)
            d_loss_fake = bce_loss(fake_labels, fake_output)
            d_loss = d_loss_real + d_loss_fake
            
            # Evaluate generator with feature matching
            g_loss = bce_loss(real_labels, fake_output)
            feature_matching_loss = tf.reduce_mean(tf.abs(fake_features - real_features))
            g_loss += lambda_feat * feature_matching_loss
            
            # Store losses
            val_d_losses.append(d_loss.numpy())
            val_g_losses.append(g_loss.numpy())
            
            # Break after one validation epoch
            if batch_idx >= len(val_gen) - 1:
                break
        
        # Calculate average losses
        avg_train_d_loss = np.mean(train_d_losses)
        avg_train_g_loss = np.mean(train_g_losses) if train_g_losses else 0
        avg_train_d_acc = np.mean(train_d_accs)
        avg_val_d_loss = np.mean(val_d_losses)
        avg_val_g_loss = np.mean(val_g_losses)
        
        # Update history
        history['d_loss'].append(avg_train_d_loss)
        history['g_loss'].append(avg_train_g_loss)
        history['val_d_loss'].append(avg_val_d_loss)
        history['val_g_loss'].append(avg_val_g_loss)
        
        # Print progress
        time_taken = time.time() - start_time
        print(f"d_loss: {avg_train_d_loss:.4f}, g_loss: {avg_train_g_loss:.4f}, d_acc: {avg_train_d_acc:.4f}")
        print(f"val_d_loss: {avg_val_d_loss:.4f}, val_g_loss: {avg_val_g_loss:.4f}, time: {time_taken:.2f}s")
        
        # Check for early stopping
        if avg_val_g_loss < best_g_loss:
            best_g_loss = avg_val_g_loss
            patience_counter = 0
            # Save best models
            generator.save(os.path.join(output_dir, "best_generator_model.keras"))
            discriminator.save(os.path.join(output_dir, "best_discriminator_model.keras"))
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
        
        # Generate and save sample images every 5 epochs
        if epoch % 5 == 0 or epoch == epochs - 1:
            generated_images = generator([fixed_noise, fixed_labels], training=False)
            generated_images = (generated_images * 0.5 + 0.5) * 255  # Rescale to [0, 255]
            generated_images = generated_images.numpy().astype(np.uint8)
            
            # Create a figure to display images
            plt.figure(figsize=(15, 5))
            for i in range(NUM_CLASSES * sample_per_class):
                plt.subplot(sample_per_class, NUM_CLASSES, i + 1)
                plt.imshow(generated_images[i])
                class_idx = i // sample_per_class
                plt.title(CLASS_NAMES[class_idx])
                plt.axis('off')
            
            # Save figure
            plt.tight_layout()
            plt.savefig(f"{samples_dir}/generated_epoch_{epoch+1}.png")
            plt.close()
    
    # Load best models if early stopping occurred
    if patience_counter >= patience:
        generator = tf.keras.models.load_model(os.path.join(output_dir, "best_generator_model.keras"))
        discriminator = tf.keras.models.load_model(os.path.join(output_dir, "best_discriminator_model.keras"))
    
    return generator, discriminator, history


## generate images

In [15]:
def generate_images(generator, num_samples_per_class=10, output_dir="generated_images"):
    """Generate synthetic images for each class and save them."""
    print(f"Generating {num_samples_per_class} images per class...")
    
    # Create output directories
    for class_name in CLASS_NAMES:
        os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)
    
    for i, class_name in enumerate(CLASS_NAMES):
        # Create one-hot encoded labels for this class
        labels = np.zeros((num_samples_per_class, NUM_CLASSES))
        labels[:, i] = 1
        # Convert to tensor
        labels = tf.convert_to_tensor(labels, dtype=tf.float32)
        
        # Generate random noise
        noise = tf.random.normal([num_samples_per_class, LATENT_DIM])
        
        # Generate images
        generated_images = generator([noise, labels], training=False)
        
        # Rescale images from [-1, 1] to [0, 255]
        generated_images = (generated_images * 0.5 + 0.5) * 255
        generated_images = generated_images.numpy().astype(np.uint8)
        
        # Save images
        for j in range(num_samples_per_class):
            output_path = os.path.join(output_dir, class_name, f"synthetic_{j}.png")
            plt.imsave(output_path, generated_images[j])
            
    print(f"Images saved to {output_dir}")

## writing a main function

In [16]:
def main():
    """Main function to execute the entire training pipeline."""
    print("TensorFlow version:", tf.__version__)
    print("GPU available:", tf.config.list_physical_devices('GPU'))
    
    # Create output directory
    OUTPUT_DIR = "/kaggle/working/mango_gan_output"
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    # Load data
    train_generator, val_generator = preprocess_data()
    
    # Print dataset info
    print(f"Found {train_generator.samples} training images")
    print(f"Found {val_generator.samples} validation images")
    print(f"Class names: {train_generator.class_indices}")
    
    # Build models
    generator = build_generator()
    discriminator = build_discriminator()
    
    # Print model summaries
    generator.summary()
    discriminator.summary()
    
    # Train the model using custom training loop
    start_time = time.time()
    generator, discriminator, history = train_gan(
        generator, 
        discriminator, 
        train_generator, 
        val_generator, 
        EPOCHS, 
        OUTPUT_DIR
    )
    training_time = time.time() - start_time
    print(f"Training completed in {training_time/60:.2f} minutes")
    
    # Plot training history
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['d_loss'], label='Train Discriminator Loss')
    plt.plot(history['val_d_loss'], label='Val Discriminator Loss')
    plt.title('Discriminator Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history['g_loss'], label='Train Generator Loss')
    plt.plot(history['val_g_loss'], label='Val Generator Loss')
    plt.title('Generator Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, "training_history.png"))
    
    # Save final models
    generator.save(os.path.join(OUTPUT_DIR, "final_generator_model.keras"))
    discriminator.save(os.path.join(OUTPUT_DIR, "final_discriminator_model.keras"))
    
    # Generate a set of synthetic images
    GENERATED_DIR = os.path.join(OUTPUT_DIR, "generated_dataset")
    generate_images(generator, num_samples_per_class=20, output_dir=GENERATED_DIR)
    
    print("Done!")

In [17]:
if __name__ == "__main__":
    # Execute main function
    main()

TensorFlow version: 2.18.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Loading and preprocessing data...
Found 1438 images belonging to 7 classes.
Found 357 images belonging to 7 classes.
Found 1438 training images
Found 357 validation images
Class names: {'ANTHRACNOSE': 0, 'BACTERIAL CRANKER': 1, 'DIEBACK': 2, 'GALL MILDGE': 3, 'HEALTHY': 4, 'MANGO SOOTY': 5, 'SOOTY MOULD': 6}


I0000 00:00:1745029867.655413      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


ValueError: Exception encountered when calling InstanceNormalization.call().

[1mCould not automatically infer the output shape / dtype of 'instance_normalization' (of type InstanceNormalization). Either the `InstanceNormalization.call()` method is incorrect, or you need to implement the `InstanceNormalization.compute_output_spec() / compute_output_shape()` method. Error encountered:

Invalid reduction dimension 2 for input with 2 dimensions. for '{{node moments/mean}} = Mean[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=true](Placeholder, moments/mean/reduction_indices)' with input shapes: [?,65536], [2] and with computed input tensors: input[1] = <1 2>.[0m

Arguments received by InstanceNormalization.call():
  • args=('<KerasTensor shape=(None, 65536), dtype=float32, sparse=False, name=keras_tensor_3>',)
  • kwargs=<class 'inspect._empty'>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import numpy as np
import uuid

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_classes = 7
latent_dim = 128  # Increased from 100 to 128
image_size = 256  # Resized for computational feasibility
channels = 3
ngf = 64  # Generator feature maps
ndf = 64  # Discriminator feature maps
num_epochs = 100
batch_size = 32
lr = 0.0002
beta1 = 0.5
patience = 15  # Early stopping patience
min_delta = 0.001  # Minimum improvement for early stopping

# Class labels
class_names = ['ANTHRACNOSE', 'BACTERIAL_CRANKER', 'DIEBACK', 'GALL_MILDGE', 'HEALTHY', 'MANGO_SOOTY', 'SOOTY_MOULD']

# Data transforms
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # Normalize to [-1, 1]
])

# Load dataset
dataset = ImageFolder(root='/kaggle/input/mangoleaf-dataset/dataset', transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Generator
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)
        
        self.model = nn.Sequential(
            nn.ConvTranspose2d(latent_dim + num_classes, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf, ngf // 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf // 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf // 2, ngf // 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf // 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf // 4, channels, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_emb = self.label_emb(labels).view(labels.size(0), num_classes, 1, 1)
        z = z.view(z.size(0), latent_dim, 1, 1)
        input = torch.cat([z, label_emb], dim=1)
        return self.model(input)

# Discriminator
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)
        
        self.model = nn.Sequential(
            nn.Conv2d(channels + num_classes, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, ndf * 16, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 16, 1, 8, 1, 0, bias=False),  # Outputs 1x1
            nn.Sigmoid()
        )

    def forward(self, img, labels):
        label_emb = self.label_emb(labels).view(labels.size(0), num_classes, 1, 1)
        label_emb = label_emb.repeat(1, 1, img.size(2), img.size(3))
        input = torch.cat([img, label_emb], dim=1)
        output = self.model(input)
        return output.view(-1)  # Flatten to [batch_size]

# Initialize models
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Load pre-trained weights (if available)
# Example: generator.load_state_dict(torch.load('pretrained_generator.pth'))

# Loss and optimizers
adversarial_loss = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999))

# Early stopping variables
best_g_loss = float('inf')
epochs_no_improve = 0
early_stop = False

# Training loop
for epoch in range(num_epochs):
    if early_stop:
        print(f"Early stopping triggered at epoch {epoch}")
        break
    
    epoch_g_loss = 0.0
    epoch_d_loss = 0.0
    batches = 0
    
    for i, (imgs, labels) in enumerate(dataloader):
        batch_size = imgs.size(0)
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        # Ground truth
        real_label = torch.ones(batch_size).to(device)
        fake_label = torch.zeros(batch_size).to(device)
        
        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()
        
        # Real images
        real_validity = discriminator(imgs, labels)
        d_real_loss = adversarial_loss(real_validity, real_label)
        
        # Fake images
        z = torch.randn(batch_size, latent_dim).to(device)
        gen_labels = torch.randint(0, num_classes, (batch_size,)).to(device)
        gen_imgs = generator(z, gen_labels)
        fake_validity = discriminator(gen_imgs.detach(), gen_labels)
        d_fake_loss = adversarial_loss(fake_validity, fake_label)
        
        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()
        
        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()
        
        # Generate images
        fake_validity = discriminator(gen_imgs, gen_labels)
        g_loss = adversarial_loss(fake_validity, real_label)
        
        g_loss.backward()
        optimizer_G.step()
        
        # Accumulate losses for epoch
        epoch_g_loss += g_loss.item()
        epoch_d_loss += d_loss.item()
        batches += 1
        
        # Print progress
        if i % 10 == 0:
            print(f"[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(dataloader)}] "
                  f"D_loss: {d_loss.item():.4f}, G_loss: {g_loss.item():.4f}")
    
    # Average losses for the epoch
    epoch_g_loss /= batches
    epoch_d_loss /= batches
    
    # Early stopping check
    if epoch_g_loss < best_g_loss - min_delta:
        print(f"New best generator loss: {epoch_g_loss:.4f}. Saving models...")
        best_g_loss = epoch_g_loss
        epochs_no_improve = 0
        # Save best model weights
        torch.save(generator.state_dict(), 'best_generator_mangoleaf.pth')
        torch.save(discriminator.state_dict(), 'best_discriminator_mangoleaf.pth')
    else:
        epochs_no_improve += 1
        print(f"No improvement in generator loss. Epochs without improvement: {epochs_no_improve}/{patience}")
    
    if epochs_no_improve >= patience:
        early_stop = True
    
    # Save generated images for each class
    if epoch % 5 == 0:
        with torch.no_grad():
            for class_idx, class_name in enumerate(class_names):
                z = torch.randn(1, latent_dim).to(device)
                gen_label = torch.tensor([class_idx]).to(device)
                gen_img = generator(z, gen_label)
                torchvision.utils.save_image(gen_img, f'generated_{class_name}_epoch_{epoch}.png', normalize=True)

# Save final models
torch.save(generator.state_dict(), 'final_generator_mangoleaf.pth')
torch.save(discriminator.state_dict(), 'final_discriminator_mangoleaf.pth')

[Epoch 0/100] [Batch 0/57] D_loss: 0.7255, G_loss: 4.8431
[Epoch 0/100] [Batch 10/57] D_loss: 0.1601, G_loss: 5.3553
[Epoch 0/100] [Batch 20/57] D_loss: 0.1215, G_loss: 7.0801
[Epoch 0/100] [Batch 30/57] D_loss: 0.2218, G_loss: 5.2242
[Epoch 0/100] [Batch 40/57] D_loss: 0.0229, G_loss: 5.2852
[Epoch 0/100] [Batch 50/57] D_loss: 0.1213, G_loss: 3.6710
New best generator loss: 5.1927. Saving models...
[Epoch 1/100] [Batch 0/57] D_loss: 3.9512, G_loss: 4.9021
[Epoch 1/100] [Batch 10/57] D_loss: 0.1717, G_loss: 3.6316
[Epoch 1/100] [Batch 20/57] D_loss: 0.0120, G_loss: 5.7335
[Epoch 1/100] [Batch 30/57] D_loss: 0.6060, G_loss: 2.4729
[Epoch 1/100] [Batch 40/57] D_loss: 0.0258, G_loss: 6.0825
[Epoch 1/100] [Batch 50/57] D_loss: 0.3320, G_loss: 6.3974
New best generator loss: 4.0937. Saving models...
[Epoch 2/100] [Batch 0/57] D_loss: 0.2047, G_loss: 5.7458
[Epoch 2/100] [Batch 10/57] D_loss: 0.1932, G_loss: 3.9486
[Epoch 2/100] [Batch 20/57] D_loss: 0.0290, G_loss: 3.9508
[Epoch 2/100] [Bat