<a href="https://colab.research.google.com/github/bytebuster21/AI-project/blob/main/Configure_the_Learning_Process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop # Import common optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger # Import useful callbacks
import matplotlib.pyplot as plt
import os
import shutil

# --- 1. Mount Google Drive (if your data is there) ---
# If your dataset is on Google Drive, run this cell first.
# Otherwise, skip this step.
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted successfully!")

# --- 2. Define your dataset paths ---
trainpath = '/content/train'
testpath = '/content/test'

# IMPORTANT: Adjust these paths if your data is in Google Drive.
# Example:
# trainpath = '/content/drive/MyDrive/YourDatasetFolder/train'
# testpath = '/content/drive/MyDrive/YourDatasetFolder/test'

# --- 3. (Optional) Create Dummy Data for Demonstration if you don't have actual data yet ---
# This block ensures the code runs even without your specific dataset.
# If you have your actual images in trainpath and testpath, you can skip this.
if not os.path.exists(trainpath):
    print("Creating dummy dataset for demonstration...")
    os.makedirs(os.path.join(trainpath, 'class_A'), exist_ok=True)
    os.makedirs(os.path.join(trainpath, 'class_B'), exist_ok=True)
    os.makedirs(os.path.join(trainpath, 'class_C'), exist_ok=True)

    os.makedirs(os.path.join(testpath, 'class_A'), exist_ok=True)
    os.makedirs(os.path.join(testpath, 'class_B'), exist_ok=True)
    os.makedirs(os.path.join(testpath, 'class_C'), exist_ok=True)

    from PIL import Image
    def create_dummy_image(path, color, size=(299, 299)):
        img = Image.new('RGB', size, color=color)
        img.save(path)

    # Create dummy training images
    for i in range(116): # 116 * 3 classes = 348
        create_dummy_image(os.path.join(trainpath, 'class_A', f'imgA_{i:03d}.png'), (255, 100, 100))
        create_dummy_image(os.path.join(trainpath, 'class_B', f'imgB_{i:03d}.png'), (100, 255, 100))
        create_dummy_image(os.path.join(trainpath, 'class_C', f'imgC_{i:03d}.png'), (100, 100, 255))

    # Create dummy testing images
    for i in range(51): # 51 * 3 classes = 153
        create_dummy_image(os.path.join(testpath, 'class_A', f'testA_{i:03d}.png'), (200, 50, 50))
        create_dummy_image(os.path.join(testpath, 'class_B', f'testB_{i:03d}.png'), (50, 200, 50))
        create_dummy_image(os.path.join(testpath, 'class_C', f'testC_{i:03d}.png'), (50, 50, 200))

    print(f"Dummy dataset created at {trainpath} and {testpath}")
else:
    print(f"Using existing dataset at {trainpath} and {testpath}")


# --- 4. Configure ImageDataGenerator instances ---
TARGET_SIZE = (299, 299)
BATCH_SIZE = 20

# Training Data Generator (with augmentation)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Test Data Generator (ONLY rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)

# --- 5. Flow Images from Directories ---
print("\nLoading images using ImageDataGenerator...")

train_generator = train_datagen.flow_from_directory(
    trainpath,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    testpath,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Get class names and number of classes from the generator
num_classes = train_generator.num_classes
class_names = list(train_generator.class_indices.keys())

print(f"Found {train_generator.samples} training images belonging to {num_classes} classes.")
print(f"Found {test_generator.samples} test images belonging to {num_classes} classes.")
print("Class Names:", class_names)


# --- 6. Build the Model using VGG16 as a Pre-trained Base (Feature Extractor) and Add Custom Layers ---

base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(TARGET_SIZE[0], TARGET_SIZE[1], 3)
)

base_model.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(num_classes, activation='softmax')
])


chosen_optimizer = Adam(learning_rate=0.0001)

chosen_loss_function = 'categorical_crossentropy'

chosen_metrics = ['accuracy']

# Compile the model with the chosen configurations
model.compile(
    optimizer=chosen_optimizer,
    loss=chosen_loss_function,
    metrics=chosen_metrics
)

# Print the summary of the complete model
print("\n--- Complete Model Summary (VGG16 with Custom Dense Layers) ---")
model.summary()


early_stopping = EarlyStopping(
    monitor='val_loss', # Metric to monitor (e.g., validation loss)
    patience=5,         # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True, # Restores model weights from the epoch with the best value of the monitored metric.
    verbose=1           # Verbosity mode (0 = silent, 1 = progress bar)
)

checkpoint_dir = 'model_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True) # Create directory to save checkpoints
checkpoint_filepath = os.path.join(checkpoint_dir, 'best_model_vgg16.h5')
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy', # Metric to monitor (e.g., validation accuracy)
    save_best_only=True,    # Only save the model when the monitored metric improves
    mode='max',             # 'max' for accuracy (higher is better), 'min' for loss (lower is better)
    verbose=1
)

# 8.3 ReduceLROnPlateau: Reduces the learning rate when a metric has stopped improving.
# This can help the model converge better in later stages of training.
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', # Metric to monitor
    factor=0.2,         # Factor by which the learning rate will be reduced (new_lr = lr * factor)
    patience=3,         # Number of epochs with no improvement after which learning rate will be reduced.
    min_lr=0.000001,    # Lower bound on the learning rate.
    verbose=1
)

# 8.4 CSVLogger: Streams epoch results to a CSV file. Useful for plotting later.
csv_logger = CSVLogger('training_log.csv', append=True)


# Collect all desired callbacks into a list
callbacks_list = [
    early_stopping,
    model_checkpoint,
    reduce_lr,
    csv_logger
]




print("\n--- Starting Model Training (Uncomment to run) ---")


Mounted at /content/drive
Google Drive mounted successfully!
Creating dummy dataset for demonstration...
Dummy dataset created at /content/train and /content/test

Loading images using ImageDataGenerator...
Found 348 images belonging to 3 classes.
Found 153 images belonging to 3 classes.
Found 348 training images belonging to 3 classes.
Found 153 test images belonging to 3 classes.
Class Names: ['class_A', 'class_B', 'class_C']
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

--- Complete Model Summary (VGG16 with Custom Dense Layers) ---



--- Starting Model Training (Uncomment to run) ---
