In [1]:
import os
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from PIL import Image
import yaml
import numpy as np

In [3]:
# Define your custom dataset class
class CustomObjectDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, root, split, transform=None):
        self.root = root
        self.split = split
        self.transform = transform
        self.images_folder = os.path.join(root, split, 'images')
        self.labels_folder = os.path.join(root, split, 'labels')
        self.images = os.listdir(self.images_folder)
        self.classes = self.load_classes()

    def load_classes(self):
        # Load class names from data.yaml
        data_yaml_path = os.path.join(self.root, 'data.yaml')
        with open(data_yaml_path, 'r') as f:
            data = yaml.safe_load(f)
        return data['names']

    def load_annotations(self, image_name):
        annotation_file = os.path.splitext(image_name)[0] + '.txt'
        annotation_path = os.path.join(self.labels_folder, annotation_file)

        with open(annotation_path, 'r') as f:
            lines = f.read().splitlines()

        # Parse lines to get bounding boxes and class labels
        boxes = []
        labels = []
        for line in lines:
            parts = line.split()
            if len(parts) == 5:  # Expected format: class x_min y_min x_max y_max
                class_label = int(parts[0])
                x_min, y_min, x_max, y_max = map(float, parts[1:])
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(class_label)

        return {
            'image': image_name,
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
        }

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_name = self.images[idx]
        image_path = os.path.join(self.images_folder, image_name)
        image = Image.open(image_path).convert("RGB")

        # Load the corresponding annotation (bounding boxes and labels) for this image
        annotation = self.load_annotations(image_name)

        # Pad annotations to make them the same size
        max_num_boxes = max(len(annotation['boxes']) for annotation in [annotation])
        for annotation in [annotation]:
            num_boxes = len(annotation['boxes'])
            if num_boxes < max_num_boxes:
                padding = torch.zeros(max_num_boxes - num_boxes, 4)
                annotation['boxes'] = torch.cat([annotation['boxes'], padding], dim=0)

        if self.transform is not None:
            image = self.transform(image)

        return image, annotation

In [4]:
# Define the path to your custom dataset root folder
custom_dataset_root = '/content/drive/MyDrive/CustomIndoorDataset'

In [5]:
# Define the dataset split (train, val, or test)
split = 'train'

In [6]:
#custom transformations
custom_transform = transforms.Compose([
    transforms.Resize((800, 800)),  # Resize your images as needed
    transforms.ToTensor(),
])

In [7]:
# Create a custom dataset for the specified split
custom_dataset = CustomObjectDetectionDataset(root=custom_dataset_root, split=split, transform=custom_transform)

In [8]:
# Create a data loader
data_loader = DataLoader(custom_dataset, batch_size=2, shuffle=True, num_workers=4)



In [9]:
# Define the model architecture (Faster R-CNN)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 240MB/s]


In [10]:
# Modify the classifier head for your custom number of classes
num_classes = len(custom_dataset.classes)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
    in_channels=in_features,
    num_classes=num_classes
)

In [11]:
# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [12]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()

    for images, annotation in data_loader:
        # Prepare images and annotations
        images = list(image for image in images)

        # Forward pass, calculate loss, and optimize
        loss_dict = model(images, annotation)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    lr_scheduler.step()

RuntimeError: ignored

In [13]:
# Save the trained model
torch.save(model.state_dict(), 'custom_trained_model.pth')