<a href="https://colab.research.google.com/github/WalterPHD/Ai-Data/blob/main/ResNet_and_VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Problem 1

My first version of the code was a standard U-Net, where I built both the encoder and decoder from scratch. All convolutional layers started with random weights, so the model had to learn everything—image features and segmentation boundaries—directly from the salt dataset. While this worked, it required more training data and time, and it was more likely to overfit.

The new version uses transfer learning. Instead of training the encoder from zero, it uses a pre-trained backbone like ResNet50 or VGG16, originally trained on ImageNet. These networks already know how to detect general features such as edges, textures, and shapes. The U-Net decoder is then added on top, using upsampling and skip connections to generate the segmentation mask.

I understood the it works more like...
Load a pre-trained encoder (e.g., ResNet or VGG) with weights='imagenet' and include_top=False—this keeps the convolutional layers for feature extraction but removes the classification layers.

Use intermediate feature maps from the encoder as skip connections in the U-Net.

Add a decoder with upsampling and convolution layers to rebuild the segmentation mask.

Train only the decoder at first while keeping the encoder frozen, and later fine-tune some encoder layers if needed.

The key difference is that my first model had to learn all features from scratch, while the new one reuses knowledge from ImageNet. This makes training faster, reduces overfitting, and gives better accuracy, even with a smaller dataset.

#Problem 2

In [1]:


import os
import numpy as np
import tensorflow as tf

from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, UpSampling2D, MaxPooling2D, concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras import backend as K

# Make GPU memory growth safer
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for g in gpus:
            tf.config.experimental.set_memory_growth(g, True)
    except Exception as e:
        print("GPU config warning:", e)

def dice_coef(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(K.cast(y_pred > 0.5, 'float32'))
    inter = K.sum(y_true_f * y_pred_f)
    return (2. * inter + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def iou_coef(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(K.cast(y_pred > 0.5, 'float32'))
    inter = K.sum(y_true_f * y_pred_f)
    union = K.sum(y_true_f) + K.sum(y_pred_f) - inter
    return (inter + smooth) / (union + smooth)

def conv_block(x, n_filters):
    x = Conv2D(n_filters, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(n_filters, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def build_unet_backbone(backbone='resnet50', input_shape=(256, 256, 3), freeze_encoder=True):
    """
    backbone: 'resnet50' or 'vgg16'
    input_shape must be 3-channel (RGB) because we load ImageNet weights.
    """
    inputs = Input(shape=input_shape)

    if backbone.lower() == 'resnet50':
        base = ResNet50(weights='imagenet', include_top=False, input_tensor=inputs)
        # Skip features (from shallow to deep)
        skip1 = base.get_layer('conv1_relu').output         # ~1/2
        skip2 = base.get_layer('conv2_block3_out').output   # ~1/4
        skip3 = base.get_layer('conv3_block4_out').output   # ~1/8
        skip4 = base.get_layer('conv4_block6_out').output   # ~1/16
        bottleneck = base.get_layer('conv5_block3_out').output  # ~1/32
    elif backbone.lower() == 'vgg16':
        base = VGG16(weights='imagenet', include_top=False, input_tensor=inputs)
        # VGG blocks
        skip1 = base.get_layer('block1_conv2').output  # ~1/2
        skip2 = base.get_layer('block2_conv2').output  # ~1/4
        skip3 = base.get_layer('block3_conv3').output  # ~1/8
        skip4 = base.get_layer('block4_conv3').output  # ~1/16
        bottleneck = base.get_layer('block5_conv3').output  # ~1/32
    else:
        raise ValueError("backbone must be 'resnet50' or 'vgg16'")

    if freeze_encoder:
        for l in base.layers:
            l.trainable = False


    x = UpSampling2D((2, 2))(bottleneck)        # 1/16
    x = concatenate([x, skip4])
    x = conv_block(x, 512)

    x = UpSampling2D((2, 2))(x)                  # 1/8
    x = concatenate([x, skip3])
    x = conv_block(x, 256)

    x = UpSampling2D((2, 2))(x)                  # 1/4
    x = concatenate([x, skip2])
    x = conv_block(x, 128)

    x = UpSampling2D((2, 2))(x)                  # 1/2
    x = concatenate([x, skip1])
    x = conv_block(x, 64)

    x = UpSampling2D((2, 2))(x)                  # 1/1
    x = conv_block(x, 32)

    outputs = Conv2D(1, 1, activation='sigmoid')(x)
    model = Model(inputs, outputs, name=f'unet_{backbone}')
    return model

# ========= Data pipeline (pairs RGB image + 1ch mask) =========
def make_generators(train_dir,
                    img_sub='image',
                    mask_sub='label',
                    target_size=(256, 256),
                    batch_size=4,
                    val_split=0.1,
                    seed=42,
                    augment=True):

    if augment:
        common_args = dict(
            rescale=1./255,
            rotation_range=10,
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.05,
            zoom_range=0.05,
            horizontal_flip=True,
            fill_mode='nearest',
            validation_split=val_split
        )
    else:
        common_args = dict(rescale=1./255, validation_split=val_split)

    img_gen = ImageDataGenerator(**common_args)
    msk_gen = ImageDataGenerator(**common_args)

    img_train = img_gen.flow_from_directory(
        train_dir, classes=[img_sub], class_mode=None,
        target_size=target_size, color_mode='rgb',
        batch_size=batch_size, subset='training', seed=seed, shuffle=True)

    msk_train = msk_gen.flow_from_directory(
        train_dir, classes=[mask_sub], class_mode=None,
        target_size=target_size, color_mode='grayscale',
        batch_size=batch_size, subset='training', seed=seed, shuffle=True)

    img_val = img_gen.flow_from_directory(
        train_dir, classes=[img_sub], class_mode=None,
        target_size=target_size, color_mode='rgb',
        batch_size=batch_size, subset='validation', seed=seed, shuffle=False)

    msk_val = msk_gen.flow_from_directory(
        train_dir, classes=[mask_sub], class_mode=None,
        target_size=target_size, color_mode='grayscale',
        batch_size=batch_size, subset='validation', seed=seed, shuffle=False)

    def pair_gen(a, b):
        while True:
            X = next(a)
            y = next(b)
            y = (y > 0.5).astype('float32')
            yield X, y

    train_pairs = pair_gen(img_train, msk_train)
    val_pairs = pair_gen(img_val, msk_val)

    steps_train = len(img_train)
    steps_val = len(img_val)
    return train_pairs, val_pairs, steps_train, steps_val


def train_and_eval(backbone, train_dir='data/membrane/train', input_size=(256, 256), batch_size=4,  epochs=10, freeze_encoder=True, out_path=None):
    train_gen, val_gen, steps_tr, steps_va = make_generators( train_dir=train_dir, target_size=input_size, batch_size=batch_size, val_split=0.1, augment=True)
    model = build_unet_backbone(backbone=backbone,input_shape=(input_size[0], input_size[1], 3), freeze_encoder=freeze_encoder)

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy', dice_coef, iou_coef])

    if out_path is None:
        out_path = f'unet_{backbone}.keras'

    callbacks = [
        ModelCheckpoint(out_path, monitor='val_iou_coef', mode='max',
                        save_best_only=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
        EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)
    ]

    history = model.fit(train_gen, steps_per_epoch=steps_tr, validation_data=val_gen, validation_steps=steps_va, epochs=epochs, callbacks=callbacks, verbose=1)

    # Final validation evaluation
    val_scores = model.evaluate(val_gen, steps=steps_va, verbose=0)
    scores = dict(zip(model.metrics_names, val_scores))
    return model, history, scores


def save_sample_preds(model, val_gen, out_dir='pred_samples', n=4):
    os.makedirs(out_dir, exist_ok=True)
    X, y = next(val_gen)
    preds = model.predict(X, verbose=0)
    for i in range(min(n, X.shape[0])):
        tf.keras.preprocessing.image.save_img(os.path.join(out_dir, f'img_{i}.png'), X[i])
        tf.keras.preprocessing.image.save_img(os.path.join(out_dir, f'mask_{i}.png'), y[i])
        tf.keras.preprocessing.image.save_img(os.path.join(out_dir, f'pred_{i}.png'),
                                              (preds[i] > 0.5).astype('float32'))


#Problem 3


In [None]:
TRAIN_DIR = 'data/membrane/train'

INPUT_SIZE = (128, 128)
BATCH_SIZE = 8
EPOCHS = 1
FREEZE_ENCODER = True

resnet_model, resnet_hist, resnet_scores = train_and_eval(
    backbone='resnet50',
    train_dir=TRAIN_DIR,
    input_size=INPUT_SIZE,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    freeze_encoder=FREEZE_ENCODER,
    out_path='unet_resnet50.keras'
)

vgg_model, vgg_hist, vgg_scores = train_and_eval(
    backbone='vgg16',
    train_dir=TRAIN_DIR,
    input_size=INPUT_SIZE,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    freeze_encoder=FREEZE_ENCODER,
    out_path='unet_vgg16.keras'
)

def short_scores(name, s):
    return f"{name}: val_loss={s.get('loss',None):.4f} | val_acc={s.get('accuracy',None):.4f} | val_dice={s.get('dice_coef',None):.4f} | val_iou={s.get('iou_coef',None):.4f}"

print(short_scores("ResNet50", resnet_scores))
print(short_scores("VGG16   ", vgg_scores))

_, val_gen_r, _, _ = make_generators(TRAIN_DIR, target_size=INPUT_SIZE, batch_size=BATCH_SIZE, val_split=0.1, augment=False)
save_sample_preds(resnet_model, val_gen_r, out_dir='pred_samples_resnet', n=4)

_, val_gen_v, _, _ = make_generators(TRAIN_DIR, target_size=INPUT_SIZE, batch_size=BATCH_SIZE, val_split=0.1, augment=False)
save_sample_preds(vgg_model, val_gen_v, out_dir='pred_samples_vgg', n=4)


Found 0 images belonging to 1 classes.
Found 0 images belonging to 1 classes.
Found 0 images belonging to 1 classes.
Found 0 images belonging to 1 classes.
   1127/Unknown [1m311s[0m 261ms/step - accuracy: 0.0000e+00 - dice_coef: 1.0000 - iou_coef: 1.0000 - loss: 0.0000e+00