Feature extraction model

Importing libraries

In [None]:
import tensorflow as tf
from keras import layers, models
import os
import tensorflow as tf
import json as js
import numpy as np
from keras.callbacks import ModelCheckpoint #type: ignore

In [39]:
def inception_block(x, filters):
    branch1x1 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch3x3 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch3x3 = layers.Conv2D(filters, (3, 3), padding='same', activation='relu')(branch3x3)
    branch5x5 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch5x5 = layers.Conv2D(filters, (5, 5), padding='same', activation='relu')(branch5x5)
    branch_pool = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(branch_pool)
    x = layers.concatenate([branch1x1, branch3x3, branch5x5, branch_pool], axis=-1)
    x = layers.BatchNormalization()(x)
    return x

def mbconv_block(x, filters, kernel_size, strides=(1, 1), expand_ratio=6):
    input_tensor = x
    in_channels = x.shape[-1]
    x = layers.Conv2D(in_channels * expand_ratio, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.DepthwiseConv2D(kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    if strides == (1, 1) and in_channels == filters:
        x = layers.add([x, input_tensor])
    return x

def efficientnet_encoder(input_shape=(128, 128, 3)):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), strides=(2, 2), padding="same", use_bias=False, kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = mbconv_block(x, 64, (3, 3), strides=(1, 1), expand_ratio=1)
    x = mbconv_block(x, 128, (3, 3), strides=(2, 2), expand_ratio=6)
    x = mbconv_block(x, 128, (3, 3), strides=(1, 1), expand_ratio=6)
    x = mbconv_block(x, 256, (3, 3), strides=(2, 2), expand_ratio=6)
    x = mbconv_block(x, 256, (3, 3), strides=(1, 1), expand_ratio=6)
    return models.Model(inputs, x)

def detection_decoder(input_tensor, num_classes):
    x = inception_block(input_tensor, 32)
    x = inception_block(x, 64)
    x = inception_block(x, 128)
    x = inception_block(x, 256)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    bbox_output = layers.Dense(4, activation='sigmoid', name='bbox')(x)
    class_output = layers.Dense(num_classes, activation='softmax', name='class')(x)
    return bbox_output, class_output

def custom_detection_model(input_shape=(128, 128, 3), num_classes=80):
    encoder = efficientnet_encoder(input_shape)
    x = encoder.output
    bbox_output, class_output = detection_decoder(x, num_classes)
    model = models.Model(inputs=encoder.input, outputs=[bbox_output, class_output])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
        loss={
            'bbox': 'mean_squared_error',
            'class': 'categorical_crossentropy',
        },
        metrics={
            'bbox': 'mse',
            'class': 'accuracy',
        }
    )
    
    return model

Importing paths

In [40]:
with open("config_CNN.json",'r') as file:
    paths = js.load(file)

In [41]:
json_path = paths["Preprocessed_Train_json"] 
with open(json_path, "r") as f:
    annotations = js.load(f)
category_list = [item["category_id"] for item in annotations]
category_list = set(category_list)
label2idx = {name: idx for idx, name in enumerate(category_list)}

In [42]:
train_dir = paths["Train_resized"]
json_path = paths["Preprocessed_Train_json"] 
IMG_SIZE = 128 

with open(json_path, "r") as f:
    annotations = js.load(f)

annotation_dict = {
    item["img_id"]: (item["category_id"], item["bbox"]) for item in annotations
} 

def convert_bbox(bbox):
    x_min, y_min, width, height = bbox
    x_center = x_min + width / 2
    y_center = y_min + height / 2

    return [
        x_center / IMG_SIZE,
        y_center / IMG_SIZE,
        width / IMG_SIZE,
        height / IMG_SIZE,
    ]

def load_image_and_label(image_path):
    filename = tf.strings.split(image_path, os.sep)[-1]
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])

    def get_label_and_bbox(f):
        f_decoded = f.numpy().decode("utf-8")
        category_id, bbox = annotation_dict[f_decoded]
        label_idx = label2idx[category_id]
        bbox = convert_bbox(bbox)
        return np.int32(label_idx), np.array(bbox, dtype=np.float32)

    label, bbox = tf.py_function(
        func=get_label_and_bbox,
        inp=[filename],
        Tout=(tf.int32, tf.float32)
    )

    label.set_shape([])      
    bbox.set_shape([4])      
    one_hot_label = tf.one_hot(label, depth=80)
    return image, {"bbox": bbox, "class": one_hot_label}

def create_dataset(image_dir, batch_size=32, shuffle=True):
    image_paths = tf.data.Dataset.list_files(os.path.join(image_dir, "*.jpg"), shuffle=shuffle)
    dataset = image_paths.map(load_image_and_label, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        dataset = dataset.shuffle(500)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

train_ds = create_dataset(train_dir)

In [45]:
model = custom_detection_model()
checkpoint_cb = ModelCheckpoint(
    filepath=paths["Trained_model"],            
    monitor='bbox_mse',                 
    save_best_only=True,                
    save_weights_only=False,              
    mode='min',                          
    verbose=1                           
)

history = model.fit(
    train_ds,
    epochs=25,
    callbacks=[checkpoint_cb]
)


Epoch 1/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - bbox_loss: 0.1031 - bbox_mse: 0.1031 - class_accuracy: 0.0529 - class_loss: 4.7317 - loss: 4.8349
Epoch 1: bbox_mse improved from inf to 0.09069, saving model to /home/utkarsh/Desktop/Sem-2/Deep Learning/2024PGCSDS14_Utkarsh Saxena_DeepLearning/DLProject/best_model_bbox.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 110ms/step - bbox_loss: 0.1031 - bbox_mse: 0.1031 - class_accuracy: 0.0529 - class_loss: 4.7316 - loss: 4.8347
Epoch 2/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - bbox_loss: 0.0685 - bbox_mse: 0.0685 - class_accuracy: 0.1494 - class_loss: 3.7617 - loss: 3.8302
Epoch 2: bbox_mse improved from 0.09069 to 0.06431, saving model to /home/utkarsh/Desktop/Sem-2/Deep Learning/2024PGCSDS14_Utkarsh Saxena_DeepLearning/DLProject/best_model_bbox.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 109ms/s