In [2]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np

In [3]:
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []
    labels = []
    for obj in root.findall('object'):
        label = obj.find('name').text
        xmin = int(float(obj.find('bndbox/xmin').text))
        ymin = int(float(obj.find('bndbox/ymin').text))
        xmax = int(float(obj.find('bndbox/xmax').text))
        ymax = int(float(obj.find('bndbox/ymax').text))
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
    return boxes, labels


In [4]:
def preprocess_voc2012_dataset(dataset_path, output_path, target_size=(224, 224)):
    image_dir = os.path.join(dataset_path, 'JPEGImages')
    annotation_dir = os.path.join(dataset_path, 'Annotations')
    segmentation_object_dir = os.path.join(dataset_path, 'SegmentationObject')
    segmentation_class_dir = os.path.join(dataset_path, 'SegmentationClass')

    # Create output directory if it doesn't exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # Loop through all images in the VOC2012 dataset
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            # Read image
            image_path = os.path.join(image_dir, filename)
            image = cv2.imread(image_path)

            # Read corresponding annotation file
            annotation_path = os.path.join(annotation_dir, filename[:-4] + '.xml')
            boxes, labels = parse_annotation(annotation_path)

            # Resize image to target size
            image = cv2.resize(image, target_size)

            # Optionally normalize pixel values
            # image = image.astype(np.float32) / 255.0

            # Save preprocessed image
            output_image_path = os.path.join(output_path, filename)
            cv2.imwrite(output_image_path, image)

            # Process segmentation mask (optional)
            segmentation_object_path = os.path.join(segmentation_object_dir, filename[:-4] + '.png')
            segmentation_class_path = os.path.join(segmentation_class_dir, filename[:-4] + '.png')
            if os.path.exists(segmentation_object_path):
                segmentation_object = cv2.imread(segmentation_object_path, cv2.IMREAD_GRAYSCALE)
                segmentation_object = cv2.resize(segmentation_object, target_size)
                cv2.imwrite(os.path.join(output_path, f"{filename[:-4]}_segmentation_object.png"), segmentation_object)
            if os.path.exists(segmentation_class_path):
                segmentation_class = cv2.imread(segmentation_class_path, cv2.IMREAD_GRAYSCALE)
                segmentation_class = cv2.resize(segmentation_class, target_size)
                cv2.imwrite(os.path.join(output_path, f"{filename[:-4]}_segmentation_class.png"), segmentation_class)

In [5]:
# Example usage:
dataset_path = '/kaggle/input/voc2012/VOCdevkit/VOC2012'
output_path = '/kaggle/working/Preprocessed'

# Preprocess the VOC2012 dataset
preprocess_voc2012_dataset(dataset_path, output_path)

In [6]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

2024-04-18 14:56:10.273978: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-18 14:56:10.274145: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-18 14:56:10.478689: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
# Function to load and preprocess images
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)
    return img_array

In [8]:
# Function to load dataset and annotations
def load_dataset(dataset_path):
    images = []
    annotations = []
    for filename in os.listdir(dataset_path):
        if filename.endswith('.jpg'):
            image_path = os.path.join(dataset_path, filename)
            annotation_path = os.path.join(dataset_path, f"{filename[:-4]}_annotation.txt")  # Assuming annotations are stored in a separate text file
            images.append(load_and_preprocess_image(image_path))
            annotations.append(annotation_path)
    return np.array(images), annotations

In [9]:
def build_faster_rcnn_model(num_classes, weights_path='/kaggle/input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'):
    base_model = ResNet50(include_top=False, input_shape=(None, None, 3), weights=None)
    base_model.load_weights(weights_path, by_name=True)
    backbone = models.Model(inputs=base_model.input, outputs=base_model.layers[-4].output)

    input_feature_map = layers.Input(shape=(None, None, 2048))
    rpn = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(input_feature_map)
    rpn_cls_output = layers.Conv2D(num_anchors * 2, (1, 1), activation='softmax', name='rpn_cls_output')(rpn)
    rpn_reg_output = layers.Conv2D(num_anchors * 4, (1, 1), activation='linear', name='rpn_reg_output')(rpn)

    rpn_model = models.Model(inputs=input_feature_map, outputs=[rpn_cls_output, rpn_reg_output])

    roi_input = layers.Input(shape=(None, 4))
    pooled_regions = layers.RoiPoolingConv(7, 7)([input_feature_map, roi_input])
    x = layers.TimeDistributed(layers.Flatten())(pooled_regions)
    x = layers.TimeDistributed(layers.Dense(1024, activation='relu'))(x)
    x = layers.TimeDistributed(layers.Dense(1024, activation='relu'))(x)

    cls_output = layers.TimeDistributed(layers.Dense(num_classes, activation='softmax', kernel_initializer='zero'))(x)
    reg_output = layers.TimeDistributed(layers.Dense(num_classes * 4, activation='linear', kernel_initializer='zero'))(x)

    model = models.Model(inputs=[input_feature_map, roi_input], outputs=[cls_output, reg_output])
    return backbone, rpn_model, model


In [10]:
# Load dataset
dataset_path = '/kaggle/working/Preprocessed'
images, annotations = load_dataset(dataset_path)

# Define number of classes
num_classes = 20  # Assuming VOC dataset has 20 classes

# Encode class labels
label_encoder = LabelEncoder()
labels = [os.path.basename(annotation_path).split('_')[0] for annotation_path in annotations]
labels_encoded = label_encoder.fit_transform(labels)

In [None]:
# Split dataset into training and validation sets
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size=0.2, random_state=42)

# Define the number of anchors
num_anchors = 9  # You can adjust this value based on your requirements
# Build Faster R-CNN model

backbone, rpn_model, model = build_faster_rcnn_model(num_classes)

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss=['categorical_crossentropy', 'mse'])

# Train the model
model.fit([train_images, train_annotations], [train_labels, train_bboxes], validation_data=([val_images, val_annotations], [val_labels, val_bboxes]), batch_size=16, epochs=10)
