In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader, Dataset
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import xml.etree.ElementTree as ET

  check_for_updates()


In [9]:
# Step 1: Configuration des hyperparamètres
BATCH_SIZE = 4
NUM_EPOCHS = 30
LEARNING_RATE = 0.005
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
# Step 2: Définir le Dataset personnalisé
class PascalVOCDataset(Dataset):
    def __init__(self, root_dir, transforms=None):
        self.root_dir = root_dir
        self.transforms = transforms
        self.imgs = [f for f in os.listdir(root_dir) if f.endswith(".jpg")]

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.imgs[idx])
        annot_path = img_path.replace(".jpg", ".xml")

        # Chargement de l'image
        img = Image.open(img_path).convert("RGB")

        # Lecture des annotations
        tree = ET.parse(annot_path)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall("object"):
            bbox = obj.find("bndbox")
            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(1)  # Assuming a single class of objects

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": image_id,
        }

        if self.transforms:
            transformed = self.transforms(image=np.array(img))
            img = transformed["image"]

        return img, target

    def __len__(self):
        return len(self.imgs)

In [5]:
# Step 3: Data Augmentation avec Albumentations
train_transforms = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.ShiftScaleRotate(scale_limit=0.2, rotate_limit=15, p=0.5),
    A.Normalize(), # Apply normalization before converting to tensor
    ToTensorV2()
])

# Initialisation du dataset
train_dataset = PascalVOCDataset(root_dir="/content/drive/MyDrive/data/train", transforms=train_transforms)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))


In [6]:
# Step 4: Initialisation du modèle Faster R-CNN
model = fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 16  # Background + single object class
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.to(DEVICE)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 124MB/s]


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [7]:
# Step 5: Définition de l'optimiseur et du scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [8]:
# Step 6: Fonction d'entraînement
def train_one_epoch(model, optimizer, data_loader, device):
    model.train()
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

In [10]:
# Step 7: Entraînement du modèle
for epoch in range(NUM_EPOCHS):
    train_one_epoch(model, optimizer, train_loader, DEVICE)
    lr_scheduler.step()
    print(f"Epoch {epoch+1} completed")

# Step 8: Évaluation (simple accuracy check)
model.eval()
# Evaluation logic could include IoU calculations or mAP evaluation here

torch.save(model.state_dict(), "faster_rcnn_trained.pth")
print("Training complete and model saved.")

Epoch 1 completed
Epoch 2 completed
Epoch 3 completed
Epoch 4 completed
Epoch 5 completed
Epoch 6 completed
Epoch 7 completed
Epoch 8 completed
Epoch 9 completed
Epoch 10 completed
Epoch 11 completed
Epoch 12 completed
Epoch 13 completed
Epoch 14 completed
Epoch 15 completed
Epoch 16 completed
Epoch 17 completed
Epoch 18 completed
Epoch 19 completed
Epoch 20 completed
Epoch 21 completed
Epoch 22 completed
Epoch 23 completed
Epoch 24 completed
Epoch 25 completed
Epoch 26 completed
Epoch 27 completed
Epoch 28 completed
Epoch 29 completed
Epoch 30 completed
Training complete and model saved.


In [12]:
# Step 8: Évaluation (simple accuracy check)
model.eval()
correct_predictions = 0
total_predictions = 0

with torch.no_grad():
  for images, targets in train_loader:
    images = list(img.to(DEVICE) for img in images)
    targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
    outputs = model(images)

    for output, target in zip(outputs, targets):
      # Get predicted labels and scores
      predicted_labels = output['labels'].cpu().numpy()
      predicted_scores = output['scores'].cpu().numpy()

      # Filter predictions based on a score threshold (e.g., 0.5)
      # This helps to reduce the number of false positive predictions
      score_threshold = 0.5
      filtered_indices = predicted_scores >= score_threshold
      predicted_labels = predicted_labels[filtered_indices]

      true_labels = target['labels'].cpu().numpy()

      # Now compare the filtered predicted labels with the true labels
      # Consider only the common elements for comparison to handle the shape mismatch
      num_common_elements = min(len(predicted_labels), len(true_labels))
      correct_predictions += np.sum(predicted_labels[:num_common_elements] == true_labels[:num_common_elements])
      total_predictions += len(true_labels)

accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

print(f"Accuracy: {accuracy:.4f}")
# You can add more evaluation metrics (e.g., mAP, IoU) here.

Accuracy: 0.5649
