In [None]:
import os
import json
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torchvision.transforms as transforms
from torchvision.ops import box_iou

In [None]:
import zipfile
import os
with zipfile.ZipFile('/Waste detection.v1i.createml.zip','r') as zip_ref:
    zip_ref.extractall('/content/')
print('done')

done


In [None]:
#!pip install ipdb



In [16]:
def custom_collate_fn(batch):
    """
    Fonction de collation pour gérer les tailles variables dans les annotations.
    """
    images = []
    labels = []
    coords = []

    for item in batch:
        images.append(item[0])  # Image tensor
        labels.append(item[1]['labels'])  # Labels
        coords.append(item[1]['coords'])  # Bounding boxes

    # Empile les images (taille fixe grâce à des transformations)
    images = torch.stack(images, dim=0)

    # Utilise des listes pour les labels et coords (tailles variables)
    return images, {'labels': labels, 'coords': coords}


In [25]:
import os
import json
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torchvision.transforms as transforms
from torchvision.ops import box_iou
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torch.nn.utils.rnn import pad_sequence


# Charger les annotations JSON
def load_annotations(json_path):
    with open(json_path, 'r') as f:
        annotations = json.load(f)
    return annotations


# Dataset pour la détection d'objets
class ObjectDetectionDataset(Dataset):
    def __init__(self, annotations, images_dir, transform=None, label_map=None):
        self.annotations = annotations
        self.images_dir = images_dir
        self.transform = transform

        if label_map is None:
            self.label_map = self.build_label_map()
        else:
            self.label_map = label_map


    def build_label_map(self):
        labels = set()
        for annotation in self.annotations:
            for obj in annotation["annotations"]:
                labels.add(obj["label"])
        print("classes détectées dans le json: ", labels)
        return {label: idx for idx, label in enumerate(sorted(labels))}


    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations[idx]
        image_name = annotation["image"]
        image_path = os.path.join(self.images_dir, image_name)

        # Charger l'image
        image = cv2.imread(image_path)
        print(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Charger les bounding boxes et labels
        coords = []
        labels = []
        for obj in annotation["annotations"]:
            x, y = obj["coordinates"]["x"], obj["coordinates"]["y"]
            w, h = obj["coordinates"]["width"], obj["coordinates"]["height"]
            coords.append([x, y, w, h])
            labels.append(self.label_map[obj["label"]])

        coords = torch.tensor(coords, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)


        # Appliquer des transformations
        if self.transform:
            image = self.transform(image)

        return (image, {'coords': coords, 'labels': labels})

# Préparer le transformateur pour les images
transform = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Resize((300, 300)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


# Charger les annotations et initialiser le DataLoader
annotations_file = "/content/train/_annotations.createml.json"  # Remplacez par le chemin correct
images_dir = "/content/train"  # Remplacez par le dossier contenant les images

annotations = load_annotations(annotations_file)
label_map = {"plastic": 0, "cardboard": 1, "glass": 2, "metal": 3}
dataset = ObjectDetectionDataset(
    annotations,
    images_dir,
    transform=transform,
    label_map=label_map)


dataloader = DataLoader(dataset,
                        batch_size=4,
                        shuffle=True,
                        collate_fn=custom_collate_fn)

# Construire le modèle avec ResNet en backbone
class DetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(DetectionModel, self).__init__()
        backbone = models.resnet50(pretrained=True)
        self.feature_extractor = nn.Sequential(*list(backbone.children())[:-2])  # Supprime les couches FC

        self.conv = nn.Conv2d(2048, 1024, kernel_size=1)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(1024, num_classes)  # Classification
        self.regressor = nn.Linear(1024, 4)  # Régression des bounding boxes

    def forward(self, x):
        features = self.feature_extractor(x)
        features = self.conv(features)
        pooled_features = self.pool(features)
        pooled_features = pooled_features.flatten(start_dim=1)


        class_logits = self.classifier(pooled_features)
        coords = self.regressor(pooled_features)
        return class_logits, coords

# Initialiser le modèle, la perte et l'optimiseur
num_classes = len(label_map)

model = DetectionModel(num_classes)

criterion_class = nn.CrossEntropyLoss()
criterion_coords = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Entraîner le modèle
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(3):
    model.train()
    total_loss = 0
    for images, targets in dataloader:
        #import ipdb; ipdb.set_trace()
        images = images.to(device)
        #labels = torch.cat([t['labels'] for t in targets]).to(device)
        #coords = torch.cat([t['coords'] for t in targets]).to(device)

        labels = [torch.tensor(t).to(device) for t in targets['labels']]
        coords = [torch.tensor(t).to(device) for t in targets['coords']]

        # Forward pass
        class_logits, predicted_coords = model(images)

        # Calcul des pertes
        print(class_logits.shape)
        num_classes = len(labels)
        num_coords = len(coords)
        print(num_classes)
         # print(labels.shape)
        print(predicted_coords.shape)
        print(num_coords)


        """ loss_class = criterion_class(class_logits, labels)
        loss_coords = criterion_coords(predicted_coords, coords)
        loss = loss_class + loss_coords"""


        # Calcul des pertes pour chaque élément du batch
        loss_class = 0
        loss_coords = 0

        for i in range(len(labels)):
            # Calcul des pertes pour les classes
            loss_class += criterion_class(class_logits[i], labels[i])

            # Calcul des pertes pour les coordonnées
            loss_coords += criterion_coords(predicted_coords[i], coords[i])

        # Moyenne sur le batch
        loss_class /= len(labels)
        loss_coords /= len(coords)

        # Perte totale
        loss = loss_class + loss_coords

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}")


/content/train/IMG_9149_36_11zon_jpg.rf.3caa10538aab3290cb4e3ee56df626a0.jpg
/content/train/Image-14-_15_11zon_jpeg.rf.ad0a16c36e2ae75371e414a6b9249693.jpg
/content/train/IMG_9245_2_11zon_jpg.rf.e6a334fe161599eec702546d16026fa2.jpg
/content/train/IMG_9334_78_11zon_jpg.rf.2f3eadb45bfea7f789deed0878ba78d6.jpg


  labels = [torch.tensor(t).to(device) for t in targets['labels']]
  coords = [torch.tensor(t).to(device) for t in targets['coords']]


torch.Size([4, 4])
4
torch.Size([4, 4])
4


RuntimeError: size mismatch (got input: [4], target: [1])

In [None]:
#def draw_bounding_boxes(image, predicted_coords, labels, label_map):
    plt.imshow(image.permute(1, 2, 0).cpu().numpy())
    for coord, label in zip(predicted_coords, labels):
        x, y, w, h = coord
        x_min, y_min = x - w / 2, y - h / 2
        rect = patches.Rectangle(
            (x_min, y_min), w, h,
            linewidth=2, edgecolor='r', facecolor='none'
        )
        plt.gca().add_patch(rect)
        plt.text(x_min, y_min, list(label_map.keys())[label.item()], color='r', fontsize=10)
    plt.axis('off')
    plt.show()

# Inférence et visualisation
model.eval()
with torch.no_grad():
    for images, targets in dataloader:
        images = torch.stack(images).to(device)
        class_logits, predicted_coords = model(images)

        for i in range(len(images)):
            draw_bounding_boxes(images[i], predicted_coords[i].cpu(), torch.argmax(class_logits, dim=1).cpu(), label_map)
        break