In [1]:

# PPE Detection with DETR

## Imports and Setup
from PIL import Image
import os
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
from pycocotools.coco import COCO
import json
from torch.utils.data import DataLoader, Dataset

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transformations
transform = T.Compose([
    T.Resize((800, 800)),
    T.ToTensor(),
])

In [2]:
## Custom Dataset
class PPECustomDataset(Dataset):
    def __init__(self, images_dir, annotations_file, transform=None):
        self.images_dir = images_dir
        self.coco = COCO(annotations_file)
        self.image_ids = self.coco.getImgIds()
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        path = os.path.join(self.images_dir, image_info['file_name'])

        # Load image
        image = Image.open(path).convert('RGB')

        # Load annotations
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        anns = self.coco.loadAnns(ann_ids)
        boxes = []
        labels = []
        for ann in anns:
            boxes.append(ann['bbox'])
            labels.append(ann['category_id'])

        if self.transform:
            image = self.transform(image)

        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
        }
        return image, target

In [15]:
# Paths
# Paths
# Update the paths to where your dataset is stored
images_dir = "E:\dataset\images"  # Change to the correct directory
#annotations_file = "E:/dataset/annotations.json"  # Change to the correct file path

dataset = PPECustomDataset(images_dir, annotations_file, transform=transform)
data_loader = DataLoader(dataset, batch_size=2, shuffle=True)

## DETR Model
class DETRPPE(nn.Module):
    def __init__(self, num_classes, hidden_dim=256, nheads=8, num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()
        self.backbone = resnet50()
        del self.backbone.fc
        self.conv = nn.Conv2d(2048, hidden_dim, 1)
        self.transformer = nn.Transformer(hidden_dim, nheads, num_encoder_layers, num_decoder_layers)
        self.linear_class = nn.Linear(hidden_dim, num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim, 4)
        self.query_pos = nn.Parameter(torch.rand(100, hidden_dim))
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))

    def forward(self, inputs):
        x = self.backbone.conv1(inputs)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)
        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        h = self.conv(x)
        H, W = h.shape[-2:]
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)
        h = self.transformer(pos + 0.1 * h.flatten(2).permute(2, 0, 1),
                             self.query_pos.unsqueeze(1)).transpose(0, 1)
        return {
            'pred_logits': self.linear_class(h),
            'pred_boxes': self.linear_bbox(h).sigmoid()
        }

# Initialize model
num_classes = len(dataset.coco.getCatIds())
model = DETRPPE(num_classes).to(device)

## Training Loop
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Training
for epoch in range(10):
    model.train()
    for images, targets in data_loader:
        images = images.to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        outputs = model(images)
        loss = criterion(outputs['pred_logits'], targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

loading annotations into memory...


FileNotFoundError: [Errno 2] No such file or directory: 'E:/dataset/annotations.json'