# VGG16 fine-tuning for defect detection

This section builds a VGG16 model with ImageNet weights, lets you choose any valid input shape (HxWx3), and fine-tune only the last layers you specify.

In [2]:
import os
import sys
import numpy as np
from keras import Model
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.applications import VGG16
from sklearn.model_selection import train_test_split
from keras.layers import BatchNormalization, Dense, Dropout, GlobalAveragePooling2D, Input

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
from SRModels.loading_methods import load_defects_dataset_as_patches
from SRModels.data_augmentation import AdvancedAugmentGenerator
from SRModels.constants import VGG_PATCH_SIZE, VGG_STRIDE, RANDOM_SEED

In [3]:
def build_vgg16_finetune(
    input_shape=(128, 128, 3),
    num_classes=2,
    train_last_n_layers=4,
    base_trainable=False,
    dropout_rate=0.2,
    l2_reg=0.0,
):
    """
    Build a VGG16-based model with ImageNet weights and a custom classification head.

    Parameters
    ----------
    input_shape : (H, W, 3)
        Model input shape. Must have 3 channels.
    num_classes : int
        Number of output classes.
    train_last_n_layers : int
        Number of layers (from the end of the base model) to unfreeze for fine-tuning.
    base_trainable : bool
        If True, allow training on selected last N layers of VGG16.
    dropout_rate : float
        Dropout after the pooled features (0 disables).
    l2_reg : float
        L2 weight decay for the dense head (0 disables).

    Returns
    -------
    keras.Model
        Compiled model ready to train.
    """
    
    assert input_shape[-1] == 3, "Input must have 3 channels (RGB)."

    # Load VGG16 base with ImageNet weights and no top
    base = VGG16(
        include_top=False,
        weights="imagenet",
        input_shape=input_shape,
    )

    # Freeze all layers by default
    base.trainable = False

    # Optionally unfreeze last N layers
    if base_trainable and train_last_n_layers > 0:
        # Unfreeze only the last N layers of the base
        for layer in base.layers[-train_last_n_layers:]:
            if not isinstance(layer, BatchNormalization):
                layer.trainable = True

    # Build head
    inputs = Input(shape=input_shape)
    x = base(inputs, training=False)  # important for BN layers in eval mode
    x = GlobalAveragePooling2D(name="gap")(x)
    if dropout_rate > 0:
        x = Dropout(dropout_rate)(x)
    kernel_reg = l2(l2_reg) if l2_reg > 0 else None
    x = Dense(256, activation="relu", kernel_regularizer=kernel_reg)(x)
    x = Dropout(dropout_rate)(x) if dropout_rate > 0 else x

    outputs = Dense(num_classes, activation="softmax", name="predictions")(x)
    model = Model(inputs, outputs, name="vgg16_finetune")

    optimizer = Adam(learning_rate=1e-3)
    
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    
    return model

In [4]:
HR_ROOT = os.path.abspath(os.path.join(os.getcwd(), "../../data/images/HR"))
CLASS_LABELS_PATH = os.path.abspath(os.path.join(os.getcwd(), "../../data/images/class_labels_map.pkl"))

In [5]:
# X ->  High-resolution patches (model input)
# y -> Class labels (target)
X, y = load_defects_dataset_as_patches(HR_ROOT, patch_size=VGG_PATCH_SIZE, stride=VGG_STRIDE, class_map_path=CLASS_LABELS_PATH)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=RANDOM_SEED)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, shuffle=True, random_state=RANDOM_SEED)

In [6]:
print(f"X shape: {X.shape}, Y shape: {y.shape}")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

unique, counts = np.unique(y, return_counts=True)
print(f"Class distribution: {dict(zip(unique, counts))}")

X shape: (1920, 96, 96, 3), Y shape: (1920,)
X_train shape: (1382, 96, 96, 3), y_train shape: (1382,)
X_val shape: (154, 96, 96, 3), y_val shape: (154,)
X_test shape: (384, 96, 96, 3), y_test shape: (384,)
Class distribution: {0: 1920}


In [7]:
model = build_vgg16_finetune(
    input_shape=X.shape[1:],
    num_classes=np.unique(y).shape[0],
    train_last_n_layers=6,
    base_trainable=True,
    dropout_rate=0.3,
    l2_reg=1e-4
)

In [8]:
model.summary()

Model: "vgg16_finetune"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 96, 96, 3)]       0         
                                                                 
 vgg16 (Functional)          (None, 3, 3, 512)         14714688  
                                                                 
 gap (GlobalAveragePooling2D  (None, 512)              0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
_________________________________________________________________
 Layer (type)                Output Shape           