<a href="https://colab.research.google.com/github/aloofzebra03/Ielektron-Internship/blob/main/Project/preparing_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger
import xml.etree.ElementTree as ET
import os
import numpy as np
from PIL import Image
import pickle

In [3]:
weights_source_path = '/content/drive/MyDrive/IDD_lite/VGG_ILSVRC_16_layers_fc_reduced.h5'

weights_destination_path = '/content/drive/MyDrive/IDD_lite/VGG_coco_SSD_300x300_iter_400000_subsampled_34_classes.h5'

In [4]:
# Function to parse XML files
def parse_annotation(xml_file, class_map):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []
    labels = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in class_map:
            class_id = class_map[class_name]
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(class_id)
    return np.array(boxes), np.array(labels)

In [5]:
# Function to find all JPEG and XML files in the given directory
def find_files(directory, image_extension, annotation_extension):
    image_files = []
    annotation_files = []
    for root, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith(image_extension):
                image_files.append(os.path.join(root, filename))
            elif filename.endswith(annotation_extension):
                annotation_files.append(os.path.join(root, filename))
    return image_files, annotation_files


In [6]:
# Function to match image files with their corresponding XML annotation files
def match_files(image_files, annotation_files):
    image_to_annotation = {}
    for image_file in image_files:
        base_name = os.path.splitext(os.path.basename(image_file))[0]
        for annotation_file in annotation_files:
            if os.path.splitext(os.path.basename(annotation_file))[0] == base_name:
                image_to_annotation[image_file] = annotation_file
                break
    return image_to_annotation

In [7]:
# Load the pre-trained model
pretrained_model_path = '/content/drive/MyDrive/IDD_lite/VGG_coco_SSD_300x300_iter_400000_subsampled_34_classes.h5'
num_classes = 10  # Your custom number of classes
input_shape = (300, 300, 3)

In [8]:
# Create the SSD300 model
base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape, include_top=False)
x = base_model.output



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [9]:
# Add additional SSD layers
x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
x = Conv2D(num_classes + 4, (1, 1))(x)
x = Reshape((-1, num_classes + 4))(x)

In [10]:
predictions = x

model = tf.keras.models.Model(inputs=base_model.input, outputs=predictions)

# Load pre-trained weights
model.load_weights(pretrained_model_path, by_name=True, skip_mismatch=True)

# Prepare the dataset
class_map = {'person': 0, 'rider': 1, 'motorcycle': 2, 'bicycle': 3, 'autorickshaw': 4, 'car': 5, 'truck': 6, 'bus': 7, 'train': 8, 'traffic light':9}

In [11]:
# Specify the paths to your dataset
train_dirs = ['/content/drive/MyDrive/IDD_lite/train/BLR-2018-03-22_17-39-26_2_frontFar', '/content/drive/MyDrive/IDD_lite/train/BLR-2018-03-22_17-39-26_3_frontFar'
              ,'/content/drive/MyDrive/IDD_lite/train/BLR-2018-04-16_15-24-27_frontFar','/content/drive/MyDrive/IDD_lite/train/BLR-2018-04-16_15-44-27_frontFar'
              ,'/content/drive/MyDrive/IDD_lite/train/BLR-2018-04-16_15-54-27_frontFar','/content/drive/MyDrive/IDD_lite/train/BLR-2018-04-16_16-04-27_frontFar'
              ,'/content/drive/MyDrive/IDD_lite/train/BLR-2018-04-16_16-14-27_frontFar']
val_dirs = ['/content/drive/MyDrive/IDD_lite/test/BLR-2018-04-19_17-06-55_frontFar', '/content/drive/MyDrive/IDD_lite/test/BLR-2018-04-19_17-16-55_frontFar'
              ,'/content/drive/MyDrive/IDD_lite/test/BLR-2018-04-19_17-26-55_frontFar']

In [18]:
def load_dataset(dirs, class_map):
    images = []
    boxes = []
    labels = []
    max_boxes = 0

    for directory in dirs:
        image_files, annotation_files = find_files(directory, '.jpg', '.xml')
        image_to_annotation = match_files(image_files, annotation_files)
        for image_file, annotation_file in image_to_annotation.items():
            image = np.array(Image.open(image_file).resize((300, 300)))
            box, label = parse_annotation(annotation_file, class_map)
            if len(box) == 0:  # Skip images with no bounding boxes
                continue
            images.append(image)
            boxes.append(box)
            labels.append(label)
            max_boxes = max(max_boxes, len(box))

    # Pad boxes and labels
    padded_boxes = np.zeros((len(images), max_boxes, 4))
    padded_labels = np.zeros((len(images), max_boxes))

    for i in range(len(images)):
        padded_boxes[i, :len(boxes[i])] = boxes[i]
        padded_labels[i, :len(labels[i])] = labels[i]

    return np.array(images), padded_boxes, padded_labels

In [16]:
def save_dataset(images, boxes, labels, file_path):
    with open(file_path, 'wb') as f:
        pickle.dump((images, boxes, labels), f)

def load_dataset_from_file(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

# Paths to save/load the datasets
train_dataset_file = '/content/drive/MyDrive/IDD_lite/train_dataset.pkl'
val_dataset_file = '/content/drive/MyDrive/IDD_lite/val_dataset.pkl'

In [19]:
# Load or save the training dataset
if os.path.exists(train_dataset_file):
    train_images, train_boxes, train_labels = load_dataset_from_file(train_dataset_file)
else:
    train_images, train_boxes, train_labels = load_dataset(train_dirs, class_map)
    save_dataset(train_images, train_boxes, train_labels, train_dataset_file)

In [20]:
# Load or save the validation dataset
if os.path.exists(val_dataset_file):
    val_images, val_boxes, val_labels = load_dataset_from_file(val_dataset_file)
else:
    val_images, val_boxes, val_labels = load_dataset(val_dirs, class_map)
    save_dataset(val_images, val_boxes, val_labels, val_dataset_file)