In [2]:
import os
import shutil
import json
from sklearn.model_selection import train_test_split

def split_dataset_with_annotations(images_dir, annotations_file, output_dir, test_size=0.1, val_size=0.2, seed=42):
    # Create output directories
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)

    # Load image files
    image_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    image_files.sort()

    # Load annotations
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)

    # First split: train_val (90%) and test (10%)
    train_val_files, test_files = train_test_split(image_files, test_size=test_size, random_state=seed)

    # Second split: train (70%) and val (20%) from the remaining 90%
    train_files, val_files = train_test_split(train_val_files, test_size=val_size/(1 - test_size), random_state=seed)

    # Convert to sets for faster lookup
    train_set = set(train_files)
    val_set = set(val_files)
    test_set = set(test_files)

    def copy_images(files, split):
        for file in files:
            src_img = os.path.join(images_dir, file)
            dst_img = os.path.join(output_dir, split, 'images', file)
            shutil.copy(src_img, dst_img)

    def save_annotations(filtered_files, split):
        split_annotations = {
            "annotations": [],
            "images": [],
            "categories": annotations.get("categories", [])
        }

        # Filter annotations based on image filenames
        image_id_map = {}
        for img in annotations['images']:
            if img['file_name'] in filtered_files:
                split_annotations['images'].append(img)
                image_id_map[img['id']] = img['file_name']

        for ann in annotations['annotations']:
            if ann['image_id'] in image_id_map:
                split_annotations['annotations'].append(ann)

        # Save split-specific annotations
        out_file = os.path.join(output_dir, split, 'annotations.json')
        with open(out_file, 'w') as f:
            json.dump(split_annotations, f, indent=2)

    # Copy images and save filtered annotations for each split
    copy_images(train_files, 'train')
    save_annotations(train_files, 'train')

    copy_images(val_files, 'val')
    save_annotations(val_files, 'val')

    copy_images(test_files, 'test')
    save_annotations(test_files, 'test')

    # Summary
    print(f'''
✅ Dataset split complete:
- Train: {len(train_files)} images ({len(train_files)/len(image_files):.1%})
- Val: {len(val_files)} images ({len(val_files)/len(image_files):.1%})
- Test: {len(test_files)} images ({len(test_files)/len(image_files):.1%})
Output directory: {output_dir}
''')

# === Run the function with your setup ===

split_dataset_with_annotations(
    images_dir='images',
    annotations_file='annotations.json',
    output_dir='output',
    test_size=0.1,
    val_size=0.2,
    seed=42
)



✅ Dataset split complete:
- Train: 98 images (69.0%)
- Val: 29 images (20.4%)
- Test: 15 images (10.6%)
Output directory: output



In [7]:
import os
import time
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.datasets import CocoDetection
from torchvision.transforms import Compose, ToTensor, Resize, RandomHorizontalFlip
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.cuda.amp import GradScaler

# Dataset Wrapper
class CocoWrapper(CocoDetection):
    def __init__(self, root, annFile, transforms=None):
        super().__init__(root, annFile)
        self._transforms = transforms

    def __getitem__(self, idx):
        image, target = super().__getitem__(idx)
        boxes = []
        labels = []
        for obj in target:
            bbox = obj['bbox']
            # Convert [x, y, width, height] to [x_min, y_min, x_max, y_max]
            bbox = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
            boxes.append(bbox)
            labels.append(obj['category_id'])  # Use original category_id
        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64)
        }
        if self._transforms:
            image = self._transforms(image)
        return image, target

# Process targets to ensure tensors are moved to the correct device.
def process_targets(targets, device):
    """
    Processes targets for each image.
    Each input target is a dictionary with keys 'boxes' and 'labels'.
    This function moves the tensors to the given device.
    """
    processed_targets = []
    for t in targets:
        processed_t = {
            "boxes": t["boxes"].to(device),
            "labels": t["labels"].to(device)
        }
        processed_targets.append(processed_t)
    return processed_targets

# Training Function
def train_model(train_dir, val_dir, num_classes=7, ann_filename='annotations.json', epochs=100, output_dir='results'):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(f"🚀 Using device: {device}")
    os.makedirs(output_dir, exist_ok=True)

    train_transform = Compose([ToTensor(), RandomHorizontalFlip(0.5), Resize((800, 800))])
    val_transform = Compose([ToTensor(), Resize((800, 800))])

    train_dataset = CocoWrapper(
        root=os.path.join(train_dir, 'images'),
        annFile=os.path.join(train_dir, ann_filename),
        transforms=train_transform
    )
    val_dataset = CocoWrapper(
        root=os.path.join(val_dir, 'images'),
        annFile=os.path.join(val_dir, ann_filename),
        transforms=val_transform
    )

    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2,
                              collate_fn=lambda x: tuple(zip(*x)))
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1,
                            collate_fn=lambda x: tuple(zip(*x)))

    # Build Model: Create a Faster R-CNN model with a ResNet50 backbone.
    backbone = torchvision.models.resnet50(pretrained=True)
    backbone = torch.nn.Sequential(*list(backbone.children())[:-2])
    backbone.out_channels = 2048

    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),)
    )
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],
        output_size=7,
        sampling_ratio=2
    )

    # For a dataset with 7 object classes, we need num_classes = 7 + 1 (background) = 8.
    model = FasterRCNN(
        backbone,
        num_classes=num_classes + 1,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler
    )
    model.to(device)

    # Training Setup
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2, eta_min=1e-5)
    scaler = GradScaler()
    best_loss = float('inf')

    # Train Loop
    for epoch in range(epochs):
        model.train()  # Ensure the model is in training mode for computing losses.
        total_train_loss = 0.0
        start = time.time()

        for images, targets in train_loader:
            images = [img.to(device) for img in images]
            targets = process_targets(targets, device)
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_train_loss += loss.item()
            torch.cuda.empty_cache()

        # For validation, use training mode to obtain losses (but no gradients are computed)
        model.train()  # Force training mode so that loss dict is returned.
        total_val_loss = 0.0
        with torch.no_grad():
            for images, targets in val_loader:
                images = [img.to(device) for img in images]
                targets = process_targets(targets, device)
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())
                total_val_loss += loss.item()
                torch.cuda.empty_cache()

        scheduler.step()

        avg_train = total_train_loss / len(train_loader)
        avg_val = total_val_loss / len(val_loader)
        if avg_val < best_loss:
            best_loss = avg_val
            torch.save(model.state_dict(), os.path.join(output_dir, f'best_model_epoch{epoch+1}.pth'))
        print(f"[Epoch {epoch+1}/{epochs}] Train Loss: {avg_train:.4f} | Val Loss: {avg_val:.4f} | Time: {time.time()-start:.1f}s")

    torch.save(model.state_dict(), os.path.join(output_dir, 'final_model.pth'))
    print("✅ Training complete. Final model saved.")

# Example call:
if __name__ == "__main__":
    train_model(
        train_dir='output/train',
        val_dir='output/val',
        num_classes=7,  # 7 object classes (model internally uses 8 classes)
        ann_filename='annotations.json',
        epochs=100,
        output_dir='results'
    )


🚀 Using device: cuda
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


  scaler = GradScaler()
  with torch.cuda.amp.autocast():


[Epoch 1/100] Train Loss: 23.7547 | Val Loss: 18.0899 | Time: 15.0s
[Epoch 2/100] Train Loss: 18.7255 | Val Loss: 17.0047 | Time: 14.6s
[Epoch 3/100] Train Loss: 17.7383 | Val Loss: 16.5013 | Time: 14.6s
[Epoch 4/100] Train Loss: 17.9115 | Val Loss: 20.7800 | Time: 13.9s
[Epoch 5/100] Train Loss: 18.0711 | Val Loss: 14.0797 | Time: 14.6s
[Epoch 6/100] Train Loss: 16.7860 | Val Loss: 20.1513 | Time: 14.1s
[Epoch 7/100] Train Loss: 16.5155 | Val Loss: 14.5779 | Time: 13.8s
[Epoch 8/100] Train Loss: 16.7117 | Val Loss: 15.8922 | Time: 13.8s
[Epoch 9/100] Train Loss: 16.0404 | Val Loss: 17.0574 | Time: 13.7s
[Epoch 10/100] Train Loss: 16.3469 | Val Loss: 15.5376 | Time: 14.0s
[Epoch 11/100] Train Loss: 16.2715 | Val Loss: 14.3248 | Time: 14.0s
[Epoch 12/100] Train Loss: 16.1148 | Val Loss: 15.8393 | Time: 14.1s
[Epoch 13/100] Train Loss: 15.8851 | Val Loss: 14.5019 | Time: 14.0s
[Epoch 14/100] Train Loss: 15.7413 | Val Loss: 13.9528 | Time: 14.4s
[Epoch 15/100] Train Loss: 15.7146 | Val Lo