Libraries Used

In [45]:
import json as js
import cv2
import tensorflow as tf
import pandas as pd
import numpy as np
from os import listdir as ld
from os import path
from keras.models import Model #type: ignore
from keras import layers,models #type: ignore
from tqdm import tqdm
from keras.utils import to_categorical #type: ignore

Loading Paths

In [46]:
with open("config_CNN.json",'r') as file:
    paths = js.load(file)

Creating Datasets

In [47]:
train_img_dir = paths['Train_resized']
train_annotation = paths['Preprocessed_Train_json']
val_img_dir = paths['Validation_resized']
val_annotation = paths['Preprocessed_Validation_json']
NUM_CLASSES = 80

with open(train_annotation, 'r') as f:
    train_annotations = js.load(f)
    
with open(val_annotation, 'r') as f:
    val_annotations = js.load(f)

train_data = []
val_data = []

print("Processing Training Data")
for info in tqdm(train_annotations):
    bbox = info['bbox']
    label = info['category_id']
    image_path = path.join(train_img_dir, info["img_id"])
    train_data.append((image_path, bbox, label))

print("Processing Validation Data")
for info in tqdm(val_annotations):
    bbox = info['bbox']
    label = info['category_id']
    image_path = path.join(val_img_dir, info["img_id"])
    val_data.append((image_path, bbox, label))

def preprocess_example(image_path, bbox, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    bbox_tensor = tf.stack(bbox)
    class_tensor = tf.one_hot(label, depth=NUM_CLASSES)
    return image, {
        "class_output": class_tensor,
        "bbox_output": bbox_tensor
    }

def create_dataset(data, batch_size=32, shuffle=True):
    paths, bboxes, labels = zip(*data)
    dataset = tf.data.Dataset.from_tensor_slices((list(paths), list(bboxes), list(labels)))

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(data))

    dataset = dataset.map(lambda p, b, l: preprocess_example(p, b, l), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(train_data)
val_dataset = create_dataset(val_data, shuffle=False)


Processing Training Data


100%|██████████| 117266/117266 [00:00<00:00, 626986.07it/s]


Processing Validation Data


100%|██████████| 4952/4952 [00:00<00:00, 394390.73it/s]


Defining model

In [None]:
def inception_block(x, filters):
    branch1x1 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch3x3 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch3x3 = layers.Conv2D(filters, (3, 3), padding='same', activation='relu')(branch3x3)
    branch5x5 = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(x)
    branch5x5 = layers.Conv2D(filters, (5, 5), padding='same', activation='relu')(branch5x5)
    branch_pool = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = layers.Conv2D(filters, (1, 1), padding='same', activation='relu')(branch_pool)
    x = layers.concatenate([branch1x1, branch3x3, branch5x5, branch_pool], axis=-1)
    x = layers.BatchNormalization()(x)
    return x

def mbconv_block(x, filters, kernel_size, strides=(1, 1), expand_ratio=6):
    input_tensor = x
    in_channels = x.shape[-1]
    x = layers.Conv2D(in_channels * expand_ratio, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.DepthwiseConv2D(kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    if strides == (1, 1) and in_channels == filters:
        x = layers.add([x, input_tensor])
    return x

def efficientnet_encoder(input_shape=(256, 256, 3)):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), strides=(2, 2), padding="same", use_bias=False, kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = mbconv_block(x, 64, (3, 3), strides=(1, 1), expand_ratio=1)
    x = mbconv_block(x, 128, (3, 3), strides=(2, 2), expand_ratio=6)
    x = mbconv_block(x, 128, (3, 3), strides=(1, 1), expand_ratio=6)
    x = mbconv_block(x, 256, (3, 3), strides=(2, 2), expand_ratio=6)
    x = mbconv_block(x, 256, (3, 3), strides=(1, 1), expand_ratio=6)
    x = mbconv_block(x, 512, (3, 3), strides=(2, 2), expand_ratio=6)
    x = mbconv_block(x, 512, (3, 3), strides=(1, 1), expand_ratio=6)
    x = mbconv_block(x, 1024, (3, 3), strides=(2, 2), expand_ratio=6)
    return models.Model(inputs, x, name="efficientnet_encoder")

def inception_decoder(input_tensor, num_classes=80):
    x = inception_block(input_tensor, 32)
    x = inception_block(x, 64)
    x = inception_block(x, 128)
    x = inception_block(x, 256)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    bbox_output = layers.Dense(4, activation='sigmoid', name="bbox_output")(x)  
    class_output = layers.Dense(num_classes, activation='softmax', name="class_output")(x)
    return models.Model(input_tensor, {
    "bbox_output": bbox_output,
    "class_output": class_output
    }, name="inception_decoder")

def custom_detection_model(input_shape=(256, 256, 3), num_classes=80):
    encoder = efficientnet_encoder(input_shape)
    image_input = encoder.input
    x = encoder.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Reshape((1, 1, 1024))(x)
    x = layers.Conv2D(1024, (1, 1), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    decoder = inception_decoder(input_tensor=x, num_classes=num_classes)
    named_outputs = decoder(x)
    model = models.Model(inputs=image_input, outputs={
    "class_output": named_outputs["class_output"],
    "bbox_output": named_outputs["bbox_output"]
})

    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-4),
        loss={
            "class_output": "categorical_crossentropy",
            "bbox_output": "mse"
        },
        metrics={
            "class_output": "accuracy",
            "bbox_output": "mae" 
        }
    )

    return model

In [55]:
print(model.output_names)


ListWrapper(['inception_decoder', 'inception_decoder'])


Model training 

In [49]:
model = custom_detection_model(input_shape=(256, 256, 3), num_classes=80)
model.fit(
    train_dataset,       
    epochs=10,
    validation_data=val_dataset
)

Epoch 1/10


ValueError: In the dict argument `metrics`, key 'class_output' does not correspond to any model output. Received:
metrics={'class_output': 'accuracy', 'bbox_output': 'mae'}