# Facial Expression Recognition

## Import Essential Libraries

In [None]:
import os
import matplotlib.pyplot as plt
import shutil
import math
from PIL import Image
import cv2
import tensorflow as tf
import random
import pandas as pd
import seaborn as sns

from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess_input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

## Random Seed

In [None]:
# Set all random seeds (Python, NumPy, and TensorFlow)
tf.keras.utils.set_random_seed(42)

random.seed(42)

## Config Tensorflow to use GPU

In [None]:
import tensorflow as tf

# List all physical devices (CPUs and GPUs) that TensorFlow can see
physical_devices = tf.config.list_physical_devices()
print(f"Physical devices detected: {physical_devices}")

# Specifically list GPUs
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    print(f"\nNumber of GPUs available: {len(gpu_devices)}")
    for i, gpu in enumerate(gpu_devices):
        print(f"  GPU {i}: {gpu}")
    print("\nTensorFlow will automatically use the GPU if available.")
else:
    print("\nNo GPU devices found. TensorFlow will run on CPU.")

# You can also check if a random tensor is placed on GPU by default
# This should show GPU if one is available and being used
test_tensor = tf.constant([1.0, 2.0, 3.0])
print(f"\nDefault device for a tensor: {test_tensor.device}")

## Global Variables

In [None]:
# List of emotions
emotions = ['anger', 'contempt', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [None]:
# List of emotion lables
emotion_labels = {
    0: 'anger',
    1: 'contempt',
    2: 'disgust',
    3: 'fear',
    4: 'happy',
    5: 'neutral',
    6: 'sad',
    7: 'surprise'
}

## Define Global URLs

In [None]:
# Input related URLs
input_base_url = '/kaggle/input/'

train_images_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/train/images')
valid_images_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/valid/images')
test_images_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/test/images')

train_labels_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/train/labels')
valid_labels_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/valid/labels')
test_labels_url = os.path.join(input_base_url, 'affectnet-yolo-format/YOLO_format/test/labels')

In [None]:
# Output related URLs
output_base_url = '/kaggle/working/'

organized_base_dir = os.path.join(output_base_url, 'organized_images')

organized_train_images = os.path.join(organized_base_dir, 'train')
organized_valid_images = os.path.join(organized_base_dir, 'valid')
organized_test_images = os.path.join(organized_base_dir, 'test')

resized_base_dir = os.path.join(output_base_url, 'resized_images')

resized_train_images = os.path.join(resized_base_dir, 'train')
resized_valid_images = os.path.join(resized_base_dir, 'valid')
resized_test_images = os.path.join(resized_base_dir, 'test')

## Create Directories

In [None]:
# Create directories
os.makedirs(organized_base_dir, exist_ok=True)
os.makedirs(resized_base_dir, exist_ok=True)

## Split Images into Emotion Folders

In [None]:
def reorganize_dataset(source_images_dir, source_labels_dir, destination_base_dir, emotion_map):
    """
    Reorganizes image files into emotion-specific subfolders based on YOLO-format label files.

    Args:
        source_images_dir (str): Path to the directory containing image files (e.g., train/images).
        source_labels_dir (str): Path to the directory containing label .txt files (e.g., train/labels).
        destination_base_dir (str): Path to the root directory where reorganized data will be saved.
                                    (e.g., /kaggle/working/processed_train)
        emotion_map (dict): A dictionary mapping integer class IDs to emotion names (e.g., {0: 'anger'}).
    """
    print(f"--- Reorganizing: {source_images_dir.split('/')[-2]} set ---")

    # Create destination directories for each emotion
    for emotion_id, emotion_name in emotion_map.items():
        # Using the emotion name (e.g., 'anger') as the subfolder name
        class_folder = os.path.join(destination_base_dir, emotion_name)
        os.makedirs(class_folder, exist_ok=True) # exist_ok=True prevents error if folder already exists
        print(f"  Created directory: {class_folder}")

    # Iterate through each image file
    image_files = [f for f in os.listdir(source_images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    print(f"  Found {len(image_files)} image files in {source_images_dir}")

    processed_count = 0
    skipped_count = 0

    for img_filename in image_files:
        img_path = os.path.join(source_images_dir, img_filename)
        
        # Construct the corresponding label file name
        # Remove the image extension (.png, .jpg) and replace with .txt
        base_filename = os.path.splitext(img_filename)[0]
        label_filename = base_filename + '.txt'
        label_path = os.path.join(source_labels_dir, label_filename)

        if not os.path.exists(label_path):
            print(f"    Warning: Label file not found for {img_filename} at {label_path}. Skipping.")
            skipped_count += 1
            continue

        try:
            with open(label_path, 'r') as f:
                # Read the first line (assuming one object/emotion per image)
                label_line = f.readline().strip()
                # Extract the class_id (first number)
                class_id = int(label_line.split(' ')[0])

            emotion_name = emotion_map.get(class_id)
            if emotion_name is None:
                print(f"    Warning: Unknown class_id {class_id} for {img_filename}. Skipping.")
                skipped_count += 1
                continue

            destination_folder = os.path.join(destination_base_dir, emotion_name)
            destination_path = os.path.join(destination_folder, img_filename)

            # Copy the image file
            shutil.copy(img_path, destination_path)
            processed_count += 1

            if processed_count % 1000 == 0:
                print(f"    Processed {processed_count} images for {source_images_dir.split('/')[-2]} set...")

        except Exception as e:
            print(f"    Error processing {img_filename} or {label_filename}: {e}. Skipping.")
            skipped_count += 1
            continue

    print(f"--- Finished reorganizing {source_images_dir.split('/')[-2]} set. Processed: {processed_count}, Skipped: {skipped_count} ---")
    print(f"Reorganized data is in: {destination_base_dir}\n")

In [None]:
# Reorganize the training data
reorganize_dataset(train_images_url, train_labels_url, organized_train_images, emotion_labels)

In [None]:
os.makedirs(organized_valid_images, exist_ok=True)

# Reorganize the validation data
reorganize_dataset(valid_images_url, valid_labels_url, organized_valid_images, emotion_labels)

In [None]:
os.makedirs(organized_test_images, exist_ok=True)

# Reorganize the validation data
reorganize_dataset(test_images_url, test_labels_url, organized_test_images, emotion_labels)

## Exploratory Data Analysis

### Verify Organized Images Directories

In [None]:
# Define a function to count images in the reorganized structure
def count_images_in_organized_dirs(base_dir, emotion_list):
    """
    Counts images in emotion subfolders within a given base directory.

    Args:
        base_dir (str): The root directory of the organized dataset split (e.g., '/kaggle/working/organized_images/train').
        emotion_list (list): A list of emotion names (which are also the subfolder names).

    Returns:
        dict: A dictionary with emotion names as keys and image counts as values.
    """
    counts = {}
    total_images = 0
    print(f"--- Checking: {base_dir.split('/')[-1].capitalize()} Set ---")

    if not os.path.exists(base_dir):
        print(f"  Warning: Base directory not found: {base_dir}. Skipping.")
        return counts, 0

    for emotion in emotion_list:
        emotion_path = os.path.join(base_dir, emotion)
        if os.path.exists(emotion_path) and os.path.isdir(emotion_path):
            # Count only files (not subdirectories)
            num_images = len([f for f in os.listdir(emotion_path) if os.path.isfile(os.path.join(emotion_path, f))])
            counts[emotion] = num_images
            total_images += num_images
            print(f"  {emotion.capitalize()}: {num_images} images")
        else:
            print(f"  Warning: Emotion directory not found for {emotion} in {base_dir}.")
            counts[emotion] = 0

    print(f"  Total images in {base_dir.split('/')[-1].capitalize()} Set: {total_images}\n")
    return counts, total_images

In [None]:
# Get counts for Training Set
train_counts, total_train = count_images_in_organized_dirs(organized_train_images, emotions)

In [None]:
# Get counts for Validation Set
valid_counts, total_valid = count_images_in_organized_dirs(organized_valid_images, emotions)

In [None]:
# Get counts for Test Set
test_counts, total_test = count_images_in_organized_dirs(organized_test_images, emotions)

In [None]:
print("\n--- Overall Summary of Reorganized Dataset ---")
print(f"Total images across all sets: {total_train + total_valid + total_test}")
print(f"Train Set Total: {total_train}")
print(f"Valid Set Total: {total_valid}")
print(f"Test Set Total: {total_test}")

In [None]:
# Store counts in global variables for future use (e.g., plotting in the next step)
globals()['train_counts'] = train_counts
globals()['valid_counts'] = valid_counts
globals()['test_counts'] = test_counts

### Class Distribution of Train, Valid and Test

In [None]:
print("Preparing DataFrames for class distribution plots...")

# Create DataFrames for each split
train_df = pd.DataFrame(list(train_counts.items()), columns=['Emotion', 'Count'])
train_df['Set'] = 'Train' # Add a 'Set' column (useful if we wanted a combined plot later)

valid_df = pd.DataFrame(list(valid_counts.items()), columns=['Emotion', 'Count'])
valid_df['Set'] = 'Validation'

test_df = pd.DataFrame(list(test_counts.items()), columns=['Emotion', 'Count'])
test_df['Set'] = 'Test'

print("DataFrames (train_df, valid_df, test_df) created.")

In [None]:
def plot_class_distribution(df, title):
    """
    Plots the class distribution for a given dataset split.

    Args:
        df (pd.DataFrame): DataFrame containing 'Emotion' and 'Count' columns.
        title (str): The title for the plot.
    """
    plt.figure(figsize=(10, 6)) # Adjust figure size as needed
    sns.barplot(x='Emotion', y='Count', data=df, palette='viridis')
    plt.title(title, fontsize=16)
    plt.xlabel('Emotion', fontsize=12)
    plt.ylabel('Number of Images', fontsize=12)
    plt.xticks(rotation=45, ha='right', fontsize=10) # Rotate labels for readability
    plt.yticks(fontsize=10)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout() # Adjust layout to prevent elements from overlapping
    plt.show()

In [None]:
# Plot train directory class distribution
plot_class_distribution(train_df, 'Training Set Class Distribution')

In [None]:
# Plot valid directory class distribution
plot_class_distribution(valid_df, 'Validation Set Class Distribution')

In [None]:
# Plot test directory class distribution
plot_class_distribution(test_df, 'Test Set Class Distribution')

### Visualize Images

In [None]:
def plot_emotion_samples_organized(dataset_split_base_dir: str, emotion_name: str, num_images_to_plot: int, images_per_row: int = 10):
    """
    Plots a specified number of sample images for a given emotion from an organized dataset split.

    Args:
        dataset_split_base_dir (str): The root directory of the organized dataset split (e.g., organized_train_images).
        emotion_name (str): The name of the emotion (e.g., 'happy', 'sad'). This should match the subfolder name.
        num_images_to_plot (int): The maximum number of images to plot for this emotion.
        images_per_row (int, optional): How many images to display horizontally in each row.
                                        Defaults to 10.
    """
    # Construct the full path to the emotion's folder within the given split
    emotion_path = os.path.join(dataset_split_base_dir, emotion_name)

    # Basic input validation
    if not os.path.exists(emotion_path):
        print(f"Error: Directory not found for '{emotion_name}' at '{emotion_path}'. Skipping plot.")
        return

    # Get all image files in the emotion directory
    image_files = [f for f in os.listdir(emotion_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    if not image_files:
        print(f"No images found in '{emotion_path}' for emotion '{emotion_name}'. Skipping plot.")
        return

    # Randomly select images to plot
    images_to_display = random.sample(image_files, min(len(image_files), num_images_to_plot))

    # Calculate grid dimensions
    num_images_actual = len(images_to_display)
    if num_images_actual == 0:
        print(f"No images selected for display for '{emotion_name}'. Skipping plot.")
        return

    num_rows = math.ceil(num_images_actual / images_per_row)

    # Set up the figure and subplots
    fig_width = images_per_row * 1.5 # Adjusted for 96x96 images
    fig_height = num_rows * 1.8 # Slightly more height per row
    
    plt.figure(figsize=(fig_width, fig_height))
    
    # Extract the split name (e.g., 'train', 'valid', 'test') for the super title
    split_name = dataset_split_base_dir.split(os.sep)[-1].capitalize() 
    plt.suptitle(f"Sample Images: {emotion_name.capitalize()} ({split_name} Set)", fontsize=18, y=1.02)

    for i, img_filename in enumerate(images_to_display):
        img_path = os.path.join(emotion_path, img_filename)

        try:
            img = Image.open(img_path)
            
            ax = plt.subplot(num_rows, images_per_row, i + 1)
            ax.imshow(img)
            ax.set_title(f"#{i+1}", fontsize=8) # Small title for image number
            ax.axis('off') # Hide axes

        except Exception as e:
            print(f"  Warning: Could not load image '{img_filename}' from '{emotion_path}': {e}. Skipping.")
            ax = plt.subplot(num_rows, images_per_row, i + 1) # Maintain grid even on error
            ax.text(0.5, 0.5, 'Error', ha='center', va='center', fontsize=12, color='red')
            ax.axis('off')

    plt.tight_layout(rect=[0, 0.03, 1, 0.98]) # Adjust layout to make space for suptitle
    plt.show()
    print(f"Finished plotting {num_images_actual} images for '{emotion_name}' in the {split_name} set.\n")

In [None]:
print("--- Visualizing Training Set Images ---")
NUM_IMAGES = 100
for emotion in emotions:
    plot_emotion_samples_organized(organized_train_images, emotion, NUM_IMAGES)

In [None]:
print("--- Visualizing Validation Set Images ---")
NUM_IMAGES = 100
for emotion in emotions:
    plot_emotion_samples_organized(organized_valid_images, emotion, NUM_IMAGES)

In [None]:
print("--- Visualizing Test Set Images ---")
NUM_IMAGES = 100
for emotion in emotions:
    plot_emotion_samples_organized(organized_test_images, emotion, NUM_IMAGES)

## Resize Images to 224x224

In [None]:
def resize_and_save_dataset(source_base_dir, destination_base_dir, target_size=(224, 224)):
    """
    Resizes all images in emotion subfolders from a source directory and saves them
    to a new destination directory, maintaining the folder structure.

    Args:
        source_base_dir (str): Path to the root directory of the already organized dataset split
                               (e.g., '/kaggle/working/organized_images/train').
        destination_base_dir (str): Path to the root directory where resized images will be saved.
                                    (e.g., '/kaggle/working/resized_images/train')
        target_size (tuple): A tuple (width, height) for the new image size.
    """
    print(f"--- Resizing and saving images for: {source_base_dir.split(os.sep)[-1].capitalize()} set to {target_size} ---")

    if not os.path.exists(source_base_dir):
        print(f"  Error: Source directory not found: {source_base_dir}. Skipping.")
        return

    # Ensure the destination base directory exists
    os.makedirs(destination_base_dir, exist_ok=True)

    # Iterate through each emotion subfolder in the source directory
    emotions_in_source = [d for d in os.listdir(source_base_dir) if os.path.isdir(os.path.join(source_base_dir, d))]
    
    if not emotions_in_source:
        print(f"  No emotion subfolders found in {source_base_dir}. Skipping.")
        return

    total_resized_count = 0
    total_skipped_count = 0

    for emotion_folder in emotions_in_source:
        source_emotion_path = os.path.join(source_base_dir, emotion_folder)
        destination_emotion_path = os.path.join(destination_base_dir, emotion_folder)

        # Create the corresponding emotion subfolder in the destination
        os.makedirs(destination_emotion_path, exist_ok=True)
        # print(f"  Created directory: {destination_emotion_path}")

        image_files = [f for f in os.listdir(source_emotion_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        # print(f"    Found {len(image_files)} images in {emotion_folder}")

        resized_count_per_emotion = 0
        skipped_count_per_emotion = 0

        for img_filename in image_files:
            source_img_path = os.path.join(source_emotion_path, img_filename)
            destination_img_path = os.path.join(destination_emotion_path, img_filename)

            try:
                # Open image as grayscale (mode 'L')
                img = Image.open(source_img_path).convert('L')
                # Resize image using LANCZOS for high-quality upscaling
                # PIL.Image.LANCZOS is a high-quality filter suitable for downsampling and upsampling
                resized_img = img.resize(target_size, Image.LANCZOS)
                
                # Save the resized image
                resized_img.save(destination_img_path)
                resized_count_per_emotion += 1
                total_resized_count += 1
                
                if resized_count_per_emotion % 1000 == 0:
                    print(f"      Processed {resized_count_per_emotion} images in {emotion_folder} for {source_base_dir.split(os.sep)[-1].capitalize()} set...")

            except Exception as e:
                print(f"    Error resizing {img_filename} from {source_emotion_path}: {e}. Skipping.")
                skipped_count_per_emotion += 1
                total_skipped_count += 1
                continue
        
        # print(f"    Finished {emotion_folder}: Resized {resized_count_per_emotion}, Skipped {skipped_count_per_emotion}")

    print(f"--- Finished resizing and saving {source_base_dir.split(os.sep)[-1].capitalize()} set. Total Resized: {total_resized_count}, Total Skipped: {total_skipped_count} ---")
    print(f"Resized data saved to: {destination_base_dir}\n")

In [None]:
IMAGE_SIZE = (224, 224)

In [None]:
resize_and_save_dataset(organized_train_images, resized_train_images, target_size=IMAGE_SIZE)

In [None]:
resize_and_save_dataset(organized_valid_images, resized_valid_images, target_size=IMAGE_SIZE)

In [None]:
resize_and_save_dataset(organized_test_images, resized_test_images, target_size=IMAGE_SIZE)

## Model

In [None]:
# Define model input image size and batch size
IMAGE_SIZE = (224, 224) # ResNet50 expects 224x224 input
BATCH_SIZE = 64 # A common and good starting batch size

# Number of classes based on your EMOTION_LABELS dictionary
NUM_CLASSES = len(emotion_labels) # EMOTION_LABELS should be globally available from Cell 5

print(f"Preparing datasets from resized images with size {IMAGE_SIZE} and batch size {BATCH_SIZE}...")
print(f"Number of emotion classes: {NUM_CLASSES}")

In [None]:
# Define the data augmentation layers
# These transformations will be applied randomly to training images on each epoch.
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"), # Randomly flip images horizontally
    tf.keras.layers.RandomRotation(0.1), # Randomly rotate images by +/- 10% (36 degrees)
    tf.keras.layers.RandomZoom(0.1), # Randomly zoom in/out by +/- 10%
    # tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1), # Optional: Randomly shift images
    # tf.keras.layers.RandomContrast(0.1), # Optional: Randomly adjust contrast
    # tf.keras.layers.RandomBrightness(0.1), # Optional: Randomly adjust brightness
], name="data_augmentation_layer") # Name the sequential layer for clarity

In [None]:
# Define a function to apply the ResNet50-specific preprocessing
# This function handles the normalization (scaling pixels to [-1, 1] based on ImageNet stats)
def apply_resnet_preprocessing(image, label):
    image = resnet_preprocess_input(image)
    return image, label

In [None]:
# 1. Load the datasets using tf.keras.utils.image_dataset_from_directory
#    Images are already 224x224 from the resizing step, so image_size here primarily ensures loading consistency.
#    Labels are inferred from the emotion subfolder names (e.g., 'anger', 'contempt', etc.).

# Training Dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    resized_train_images, # Using your already resized training images
    labels='inferred',
    label_mode='int',
    image_size=IMAGE_SIZE,
    interpolation='bilinear',
    batch_size=BATCH_SIZE,
    shuffle=True, # Shuffle training data
    seed=42 # Set seed for reproducibility
)

# Validation Dataset
valid_ds = tf.keras.utils.image_dataset_from_directory(
    resized_valid_images,
    labels='inferred',
    label_mode='int',
    image_size=IMAGE_SIZE,
    interpolation='bilinear',
    batch_size=BATCH_SIZE,
    shuffle=False, # No need to shuffle validation data
    seed=42
)

# Test Dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
    resized_test_images,
    labels='inferred',
    label_mode='int',
    image_size=IMAGE_SIZE,
    interpolation='bilinear',
    batch_size=BATCH_SIZE,
    shuffle=False, # No need to shuffle test data
    seed=42
)

In [None]:
# Verify the class names inferred by Keras (they should match your emotion folder names alphabetically)
keras_inferred_class_names = train_ds.class_names
print(f"\nKeras inferred class names (alphabetical order from folders): {keras_inferred_class_names}")

# 2. Apply data augmentation to the training dataset ONLY
#    Then apply ResNet50 preprocessing and optimize dataset loading.
train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=tf.data.AUTOTUNE) # Apply augmentation
train_ds = train_ds.map(apply_resnet_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

# Apply ResNet50 preprocessing to validation and test datasets (NO augmentation here)
valid_ds = valid_ds.map(apply_resnet_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.map(apply_resnet_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

print("Datasets prepared, augmented (training only), preprocessed, and optimized.")

In [None]:
print("Defining ResNet50 model with Data Augmentation and Dropout layers...")

# Load the ResNet50 base model pre-trained on ImageNet.
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

# Initially freeze the entire base model for the first phase of training (feature extraction)
base_model.trainable = False

# Create the new model:
# 1. Input layer (receives original images)
# 2. Data augmentation layer (applies transformations ONLY during training)
# 3. Base model (ResNet50 convolutional layers)
# 4. Custom classification head (GlobalAveragePooling, Dropout, Dense)

inputs = tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
x = data_augmentation(inputs) # Apply data augmentation
x = base_model(x, training=False) # Pass through the frozen base model (training=False to ensure BatchNorm layers stay frozen)
x = GlobalAveragePooling2D()(x) # Reduce spatial dimensions
x = Dropout(0.5)(x) # Add a Dropout layer (e.g., 50% dropout rate) to prevent overfitting
outputs = Dense(NUM_CLASSES, activation='softmax')(x) # Output layer for 8 emotion classes

model = Model(inputs=inputs, outputs=outputs)

# Compile the model for the first phase of training (feature extraction)
model.compile(optimizer=Adam(learning_rate=0.001), # Standard learning rate for initial phase
              loss=SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

print("ResNet50 model (with augmentation and dropout) defined and compiled for initial feature extraction.")

In [None]:
print("Displaying model architecture summary (initial phase - frozen base)...")
model.summary()
print("\nModel architecture summary displayed. Note the 'data_augmentation_layer' and 'Dropout' layer.")
print("Also, ResNet50 layers are mostly 'Non-trainable params' in this phase.")

In [None]:
# Assuming 'model' is defined and compiled from Cell 23

INITIAL_EPOCHS = 10 # You can adjust this based on how quickly validation loss plateaus.

print(f"Starting initial training phase (feature extraction) for {INITIAL_EPOCHS} epochs...")

callbacks_initial = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    )
]

history_initial = model.fit(
    train_ds,
    epochs=INITIAL_EPOCHS,
    validation_data=valid_ds,
    callbacks=callbacks_initial
)

print("\nInitial model training (feature extraction) complete.")
print(f"Training stopped after {len(history_initial.history['loss'])} epochs.")

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

print("Preparing model for fine-tuning phase...")

# Unfreeze from a specific layer/block
# ResNet50 has 5 main blocks (conv2_block, conv3_block, conv4_block, conv5_block).
# It's common to unfreeze conv4_block and conv5_block, or just conv5_block.
# Let's try unfreezing from 'conv5_block1_1_conv' onwards (which is typically the start of the last major block).
# Inspect model.summary() to see layer names.

# Freeze all layers in the base_model first
base_model.trainable = True # First set trainable for entire base model

# Then, iterate and freeze layers you want to keep frozen
# The goal is to freeze early layers (generic features) and unfreeze later layers (more specific features)
# A common practice for ResNet50 is to unfreeze from `conv5_block1_0_conv` or similar later blocks.
# Let's freeze all layers up to and including 'conv4_block6_out'
for layer in base_model.layers:
    if 'conv5_block' not in layer.name: # Example: Freeze everything before conv5_block
        layer.trainable = False
    else:
        layer.trainable = True
        # print(f"Unfrozen layer: {layer.name}") # Optional: to see which layers are unfrozen

# Verify the number of trainable layers (optional)
# trainable_count = sum(1 for layer in model.trainable_weights)
# print(f"Number of trainable weights after partial unfreezing: {trainable_count}")


# Recompile the model with a much lower learning rate for fine-tuning.
model.compile(optimizer=Adam(learning_rate=0.00001), # Learning rate significantly reduced (e.g., 1e-5)
              loss=SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

print("Base model partially unfrozen and model recompiled for fine-tuning with a lower learning rate.")

In [None]:
print("Displaying model architecture summary (fine-tuning phase - partially unfrozen base)...")
# Note the trainable parameters should be higher than initial phase, but lower than full unfreeze.
model.summary()
print("\nModel architecture summary displayed. Observe 'Trainable params' for partial unfreezing.")

In [None]:
# Assuming 'history_initial' object from previous phase is available.

FINE_TUNE_EPOCHS = 30 # More epochs for fine-tuning, you can increase this further (e.g., 20-30)
TOTAL_EPOCHS_CUMULATIVE = INITIAL_EPOCHS + FINE_TUNE_EPOCHS

print(f"Starting fine-tuning phase for {FINE_TUNE_EPOCHS} additional epochs (Total cumulative epochs: {TOTAL_EPOCHS_CUMULATIVE})...")

callbacks_fine_tune = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5, # Increased patience for fine-tuning
        restore_best_weights=True
    )
]

history_fine_tune = model.fit(
    train_ds,
    epochs=TOTAL_EPOCHS_CUMULATIVE,
    initial_epoch=history_initial.epoch[-1] + 1, # Continue from where initial training left off
    validation_data=valid_ds,
    callbacks=callbacks_fine_tune
)

print("\nModel fine-tuning complete.")

In [None]:
import matplotlib.pyplot as plt # Ensure matplotlib.pyplot is imported

print("Plotting cumulative training and validation accuracy and loss...")

# Get training history data from the initial phase
acc = history_initial.history['accuracy']
val_acc = history_initial.history['val_accuracy']
loss = history_initial.history['loss']
val_loss = history_initial.history['val_loss']

# Append training history data from the fine-tuning phase
acc.extend(history_fine_tune.history['accuracy'])
val_acc.extend(history_fine_tune.history['val_accuracy'])
loss.extend(history_fine_tune.history['loss'])
val_loss.extend(history_fine_tune.history['val_loss'])

epochs_range = range(len(acc)) # Create a range for the x-axis, covering all combined epochs

plt.figure(figsize=(12, 5))

# Subplot 1: Accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.axvline(x=len(history_initial.history['accuracy']) - 1, color='r', linestyle='--', label='Start Fine-tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy (Cumulative)', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.grid(True)

# Subplot 2: Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.axvline(x=len(history_initial.history['loss']) - 1, color='r', linestyle='--', label='Start Fine-tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss (Cumulative)', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

print("\nCumulative Accuracy and Loss plots displayed.")

In [None]:
print("Evaluating the fine-tuned model on the test set...")

loss, accuracy = model.evaluate(test_ds)

print(f"\nTest Loss (after fine-tuning): {loss:.4f}")
print(f"Test Accuracy (after fine-tuning): {accuracy:.4f}")

print("\nModel testing complete.")