# Testfile - to be edited

Labels are in the yolo format:<br />
In this format, each line represents a bounding box with the following values: <br/><ul>
    <li>class</li>
    <li>center_x</li>
    <li>center_y</li>
    <li>width</li>
    <li>height</li>
</ul> All coordinates are normalized relative to the image dimensions.

pip install

In [107]:
!pip install torch torchvision



imports

In [108]:
import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

Image & Label directories

In [109]:
img_root = '_pytorch_bland/images'
ann_root = '_pytorch_bland/labels'

Class ImageDataset

In [111]:
class SolarPanelDataset(Dataset):
    def __init__(self, img_root, ann_root, transform=None):
        self.img_root = img_root
        self.ann_root = ann_root
        self.transform = transform
        self.image_files = sorted([os.path.join(img_root, file) for file in os.listdir(img_root) if file.endswith('.jpg')])
        self.ann_files = sorted([os.path.join(ann_root, file) for file in os.listdir(ann_root) if file.endswith('.txt')])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        img = Image.open(img_path).convert('RGB')

        # Load and process the annotations for this image
        ann_path = self.ann_files[idx]
        boxes = []  # List of bounding boxes
        labels = []  # List of labels (1 for solar panel)

        # Load and process the annotations (YOLO format)
        with open(ann_path, 'r') as f:
            for line in f:
                label, cx, cy, w, h = map(float, line.strip().split())
                x1 = (cx - w / 2) * img.width
                y1 = (cy - h / 2) * img.height
                x2 = (cx + w / 2) * img.width
                y2 = (cy + h / 2) * img.height
                boxes.append([x1, y1, x2, y2])
                # Add 1 to the label to ensure it starts from 1
                adjusted_label = int(label) + 1
                labels.append(adjusted_label)

        # Handle cases where there are no bounding boxes
        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            iscrowd = torch.zeros((0,), dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            iscrowd = torch.zeros(len(boxes), dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([idx]),
            'iscrowd': iscrowd
        }

        if self.transform:
            img = self.transform(img)

        return img, target

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = SolarPanelDataset(img_root, ann_root, transform=transform)

Split the data: We build a data & training dataset

In [112]:
from torch.utils.data import random_split

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

Create data loaders for training and validation

In [114]:
# Create data loaders for the training and validation sets
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4)

Model architecture<br />
According to Medium (https://medium.com/ibm-data-ai/faster-r-cnn-vs-yolo-vs-ssd-object-detection-algorithms-18badb0e02dc) we should use a faster R-CNN for Object Detection.

In [115]:
# Create the Faster R-CNN model
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

num_classes = 6  # 5 classes + 1 for background
model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to C:\Users\Lukas/.cache\torch\hub\checkpoints\mobilenet_v2-b0353104.pth
100.0%


In [116]:
import torchvision.models as models
import torch.optim as optim

def get_faster_rcnn_model(num_classes):
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model

num_classes = 2  # Background class (0) and solar panel class (1)
model = get_faster_rcnn_model(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, targets) in enumerate(train_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

        running_loss += losses.item()

    print(f"Epoch {epoch + 1}, Loss: {running_loss / (i + 1)}")


RuntimeError: DataLoader worker (pid(s) 23512, 14492, 21508, 16672) exited unexpectedly