In [6]:
import os
import xml.etree.ElementTree as ET
from PIL import Image, UnidentifiedImageError
import glob

# Define dataset root
dataset_dir = r"C:\Users\Sejal Hanmante\OneDrive\Desktop\idd detection\IDD_FGVD"

# Class mapping — update this as needed
class_map = {
    'car': 0,
    'truck': 1,
    'bus': 2,
    'autorickshaw': 3,
    'motorcycle': 4,
    'bicycle': 5,
    'mini-bus': 6,
    'scooter': 4
}

# Convert XML to YOLO without normalization
def xml_to_yolo(xml_path, image_path, label_path):
    try:
        img = Image.open(image_path)
        img_width, img_height = img.size
    except UnidentifiedImageError:
        print(f"⚠️ Unreadable image: {image_path}. Skipping.")
        return
    except FileNotFoundError:
        print(f"⚠️ Image not found: {image_path}. Skipping.")
        return

    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
    except Exception as e:
        print(f"❌ Error reading {xml_path}: {e}")
        return

    object_count = 0

    with open(label_path, 'w') as label_file:
        for obj in root.findall('object'):
            raw_class_name = obj.find('name').text.strip().lower()
            base_class_name = raw_class_name.split('_')[0]  # Use only the base class name

            if base_class_name not in class_map:
                print(f"⏭️ Skipping unknown class '{raw_class_name}' (base: '{base_class_name}') in {xml_path}")
                continue

            class_id = class_map[base_class_name]
            bbox = obj.find('bndbox')

            try:
                xmin = int(float(bbox.find('xmin').text))
                ymin = int(float(bbox.find('ymin').text))
                xmax = int(float(bbox.find('xmax').text))
                ymax = int(float(bbox.find('ymax').text))
            except Exception as e:
                print(f"⚠️ Invalid bbox in {xml_path}: {e}")
                continue

            # Directly store the bounding box without normalization
            label_file.write(f"{class_id} {xmin} {ymin} {xmax} {ymax}\n")
            object_count += 1

    if object_count == 0:
        print(f"🚫 No valid objects in {xml_path}, label file left empty.")

# Process splits
for split in ['train', 'val', 'test']:
    image_dir = os.path.join(dataset_dir, split, 'images')
    anno_dir = os.path.join(dataset_dir, split, 'annos')
    label_dir = os.path.join(dataset_dir, split, 'frcnnlabels')  # Change label folder name

    os.makedirs(label_dir, exist_ok=True)

    for image_filename in os.listdir(image_dir):
        if image_filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(image_dir, image_filename)
            xml_filename = os.path.splitext(image_filename)[0] + '.xml'
            xml_path = os.path.join(anno_dir, xml_filename)

            if os.path.exists(xml_path):
                label_path = os.path.join(label_dir, os.path.splitext(image_filename)[0] + '.txt')
                xml_to_yolo(xml_path, image_path, label_path)
            else:
                print(f"❌ Missing XML annotation: {xml_filename}")

# Optional: Remove all empty .txt files
for split in ['train', 'val', 'test']:
    label_dir = os.path.join(dataset_dir, split, 'frcnnlabels')  # Updated label folder name
    for txt_file in glob.glob(os.path.join(label_dir, '*.txt')):
        if os.path.getsize(txt_file) == 0:
            os.remove(txt_file)
            print(f"🗑️ Removed empty file: {txt_file}")

print("✅ All XML files processed and labels generated without normalization.")


⚠️ Unreadable image: C:\Users\Sejal Hanmante\OneDrive\Desktop\idd detection\IDD_FGVD\train\images\106.jpg. Skipping.
✅ All XML files processed and labels generated without normalization.


In [7]:
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [2]:
# Load the pre-trained Faster R-CNN model with a ResNet-50 backbone
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Number of classes (your dataset classes + 1 for background)
num_classes = 8  # For example, 2 classes + background

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the head of the model with a new one (for the number of classes in your dataset)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)



In [8]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import transforms as T
from torch.utils.data import Dataset
import os
from PIL import Image

# Define the FGVDataset as per previous structure
class FGVDataset(Dataset):
    def __init__(self, data_root, split='train', transforms=None):
        self.data_root = data_root
        self.split = split
        self.transforms = transforms
        
        self.img_dir = os.path.join(self.data_root, self.split, 'images')
        self.lbl_dir = os.path.join(self.data_root, self.split, 'frcnnlabels')
        
        self.image_files = [f for f in os.listdir(self.img_dir) if f.endswith('.jpg')]  # Adjust if needed
        
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        annotation_file = img_name.replace('.jpg', '.txt')  # Adjust for your file type
        annotation_path = os.path.join(self.lbl_dir, annotation_file)
        
        boxes = []
        labels = []
        
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                class_id = int(parts[0])
                x_min = float(parts[1])
                y_min = float(parts[2])
                x_max = float(parts[3])
                y_max = float(parts[4])

                # Ensure the bounding box is valid: width and height must be positive
                if x_max > x_min and y_max > y_min:
                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(class_id)
                else:
                    print(f"Invalid box found: {line}. Skipping this box.")
                    continue  # You can also set boxes to a default value if you prefer

        # Handle case with no valid boxes
        if len(boxes) == 0:  
            # If no valid boxes, create a default box and label
            boxes = torch.tensor([[0.0, 0.0, 1.0, 1.0]], dtype=torch.float32)  # Dummy box
            labels = torch.tensor([0], dtype=torch.int64)  # Default label
        
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        
        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.image_files)




# Define transformations (e.g., resizing, normalization)
transform = T.Compose([
    T.Resize((800, 800)),  # Resize to a fixed size
    T.ToTensor(),  # Convert image to tensor
])

# Define paths for the train, val, and test folders
data_root = r'C:\Users\Sejal Hanmante\OneDrive\Desktop\idd detection\IDD_FGVD'  # Path to the folder containing 'train', 'test', 'val' directories

# Create the dataset for training, validation, and testing directly
train_dataset = FGVDataset(data_root=data_root, split='train', transforms=transform)
val_dataset = FGVDataset(data_root=data_root, split='val', transforms=transform)
test_dataset = FGVDataset(data_root=data_root, split='test', transforms=transform)

# Create data loaders for each dataset
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, 
                          collate_fn=lambda x: tuple(zip(*x)))  # Collate function for object detection
valid_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, 
                          collate_fn=lambda x: tuple(zip(*x)))  # Collate function for object detection
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, 
                         collate_fn=lambda x: tuple(zip(*x)))  # Collate function for object detection

# Example: Access first batch from train_loader
for images, targets in train_loader:
    print(f"Batch of images shape: {images[0].shape}")
    print(f"Bounding boxes for first image in batch: {targets[0]['boxes']}")
    break  # Only print for the first batch


Batch of images shape: torch.Size([3, 800, 800])
Bounding boxes for first image in batch: tensor([[ 148.,  648.,  402.,  977.],
        [ 910.,  562., 1090.,  707.],
        [1450.,  553., 1650.,  656.],
        [1295.,  542., 1415.,  618.]])


In [10]:
from tqdm import tqdm
import torch

# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set up the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Small threshold for bounding box validity
MIN_BOX_SIZE = 1e-4  # Minimum box size threshold

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    print(f"\n🔁 Epoch {epoch + 1}/{num_epochs}")
    for images, targets in tqdm(train_loader, desc=f"Epoch {epoch + 1}", leave=False):
        # Skip batches where images or targets are None
        if images is None or targets is None:
            continue

        # Filter invalid boxes (boxes with very small width or height)
        for target in targets:
            boxes = target["boxes"]
            valid_boxes = []
            valid_labels = []

            for i, box in enumerate(boxes):
                xmin, ymin, xmax, ymax = box
                if (xmax - xmin) > MIN_BOX_SIZE and (ymax - ymin) > MIN_BOX_SIZE:
                    valid_boxes.append(box)
                    valid_labels.append(target["labels"][i])

            # Replace with the valid boxes and labels
            target["boxes"] = torch.stack(valid_boxes) if valid_boxes else torch.empty(0, 4)
            target["labels"] = torch.tensor(valid_labels)

        # Move to device
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

        train_loss += losses.item()

    lr_scheduler.step()
    avg_loss = train_loss / len(train_loader)
    print(f"📉 Epoch [{epoch + 1}/{num_epochs}] - Avg Loss: {avg_loss:.4f}")

print("✅ Training complete!")



🔁 Epoch 1/10


Epoch 1:   0%|          | 0/884 [00:00<?, ?it/s]

                                                            

KeyboardInterrupt: 

In [None]:
# Set the model to evaluation mode
model.eval()
# Test on a new image
with torch.no_grad():
    for images, targets in valid_loader:
        images = list(img.to(device) for img in images)
        predictions = model(images)
        # Example: print the bounding boxes and labels for the first image
        print(predictions[0]['boxes'])
        print(predictions[0]['labels'])