<a href="https://colab.research.google.com/github/bdilpreet66/CNN/blob/main/COCO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pycocotools

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!wget http://images.cocodataset.org/zips/train2017.zip
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip

--2023-04-28 03:25:56--  http://images.cocodataset.org/zips/train2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 3.5.25.205, 3.5.9.11, 54.231.169.89, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|3.5.25.205|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘train2017.zip’


In [None]:
!unzip train2017.zip -d train2017
!unzip annotations_trainval2017.zip -d annotations

In [None]:
from pycocotools.coco import COCO

# Initialize the COCO API for instance annotations
coco = COCO('annotations/instances_train2017.json')

# Load the categories in the dataset
categories = coco.loadCats(coco.getCatIds())

# Load the annotations for the dataset
annotations = coco.loadAnns(coco.getAnnIds())

In [6]:
import tensorflow as tf

In [7]:


# Define the input shape and number of classes for the NiN model
input_shape = (None, None, 3)
num_classes = 80

# Define the NiN model
def nin_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        # Convolutional layer 1 with 192 filters and 5x5 kernel
        tf.keras.layers.Conv2D(filters=192, kernel_size=5, padding='same', activation='relu', input_shape=input_shape),
        # NiN block 1 with 192 filters and 1x1 kernel
        tf.keras.layers.Conv2D(filters=160, kernel_size=1, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=96, kernel_size=1, padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
        # NiN block 2 with 192 filters and 1x1 kernel
        tf.keras.layers.Conv2D(filters=192, kernel_size=5, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=192, kernel_size=1, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=192, kernel_size=1, padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
        # NiN block 3 with 192 filters and 1x1 kernel
        tf.keras.layers.Conv2D(filters=192, kernel_size=3, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=192, kernel_size=1, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=num_classes, kernel_size=1, padding='same', activation='softmax'),
        # Global average pooling
        tf.keras.layers.GlobalAveragePooling2D(),
    ])
    return model

# Create the NiN model
model = nin_model(input_shape, num_classes)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy'])




In [8]:
# Define the function to read the TFRecord files and parse the data
def parse_tfrecord(serialized_example):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/label': tf.io.VarLenFeature(tf.int64),
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(serialized_example, feature_description)
    image = tf.image.decode_jpeg(example['image/encoded'], channels=3)
    height = tf.cast(example['image/height'], tf.float32)
    width = tf.cast(example['image/width'], tf.float32)
    xmin = tf.sparse.to_dense(example['image/object/bbox/xmin'])
    ymin = tf.sparse.to_dense(example['image/object/bbox/ymin'])
    xmax = tf.sparse.to_dense(example['image/object/bbox/xmax'])
    ymax = tf.sparse.to_dense(example['image/object/bbox/ymax'])
    label = tf.sparse.to_dense(example['image/object/class/label'])
    # Normalize the image
    image = tf.cast(image, tf.float32) / 255.0
    # Concatenate the bounding boxes and labels into a single tensor
    boxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)
    labels = tf.one_hot(label - 1, num_classes)
    return image, boxes, labels, height, width

# Define the function to create a TFRecord dataset from a list of TFRecord files
def create_dataset(file_list):
    dataset = tf.data.TFRecordDataset(file_list)
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [9]:
# Define the paths to the train and validation TFRecord files
train_files = ['train.record']
val_files = ['val.record']

# Create the train and validation datasets
batch_size = 16
train_dataset = create_dataset(train_files)
val_dataset = create_dataset(val_files)

# Define the callbacks for the model training
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint('model.h5', save_best_only=True, save_weights_only=True)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(patience=10)

# Train the model
epochs = 100
steps_per_epoch = 1000
validation_steps = 100
model.fit(train_dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=val_dataset, validation_steps=validation_steps, callbacks=[checkpoint_callback, early_stopping_callback])

Epoch 1/100


ValueError: ignored