## Réseau de neurones convolutif

#### Préparation des données

In [9]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
import random

In [23]:
AVERAGE_SIZE_IMAGE = (127, 145)  # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels 

name_to_int = {
    "danger": 0,
    "interdiction": 1,
    "obligation": 2,
    "stop": 3,
    "ceder": 4,
    "frouge": 5,
    "forange": 6,
    "fvert": 7,
    "ff": 8,
    "empty": 9
}

def generate_empty_bbox(image_width, image_height):
    """ 
    Generate an empty box for images without label
    """
    # Generating random coords for the bbox
    x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
    y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
    
    # Compute complete coords of the bbox
    x_max = x_min + AVERAGE_SIZE_IMAGE[0]
    y_max = y_min + AVERAGE_SIZE_IMAGE[1]
    
    return (x_min, y_min, x_max, y_max)

def load_annotations(image_dir, label_dir):
    """ 
    Create a dict with all the annotations of the dataset
    annotations = [
        {
            "filename" : image as an array,
            "labels" name of the label,
            "boxes" : coord of the label like xmin, ymin, xmax, ymax,
        }
        , ...]
    """
    
    annotations = []

    for image_file in os.listdir(image_dir):
        # Computing name and files paths
        image_path = image_dir + '/' + image_file
        name = image_file.split('.')[0]
        label_path = label_dir + '/' + name + '.csv'

        # Load image
        image = np.array(Image.open(image_path))

        # Import labels as array 
        with open(label_path, 'r') as file:
            rows = file.readlines()

            annotation_data = {
                "filename":image_path,
                "label":[],
                "boxes":[]
            }

            if rows == ['\n']:  # Create a random empty label to balance model
                # Create random coords for empty label
                xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])

                # Insert label & boxe
                annotation_data["label"].append(name_to_int["empty"])
                annotation_data["boxes"].append((xmin, ymin, xmax, ymax)) 
            
            else:
                for i, row in enumerate(rows):  # One image can contain several labels
                    row = row.strip().split(",")

                    # Compute coords of the label
                    xmin, ymin, xmax, ymax = map(int, row[0:4])

                    # Get the label name
                    class_name = name_to_int[row[4]]

                    # Insert label & boxe
                    annotation_data["label"].append(class_name)
                    annotation_data["boxes"].append((xmin, ymin, xmax, ymax)) 

        annotations.append(annotation_data)
    return annotations

class Dataset(Dataset):
    def __init__(self, img_dir, annotations, transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image = self.data[idx]['img']
        labels = self.data[idx]['labels']        
        return image, labels

transform = transforms.Compose([
    transforms.Resize(AVERAGE_SIZE_IMAGE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


annotations = load_annotations("../data/train/images", "../data/train/labels")
dataset = Dataset(img_dir="../data/train/images", annotations=annotations, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

AttributeError: 'Dataset' object has no attribute 'data'

#### Création du modèle

In [17]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64*28*28, 512)

        self.fc_bbox = nn.Linear(512, 4)  # 4 sorties pour les coordonnées de la boîte (x0, y0, x1, y1)
        self.fc_class = nn.Linear(512, num_classes)  # sorties pour les classes
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 64*28*28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        bbox = self.fc_bbox(x)
        class_logits = self.fc_class(x)
        return bbox, class_logits

model = SimpleCNN(num_classes=len(name_to_int))

#### Entrainement du modèle

In [None]:
NUM_EPOCHS = 10

# Boucle d'entraînement
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    
    for images, boxes, labels in data:
        
        optimizer.zero_grad()
        
        bbox_pred, class_pred = model(images)
        loss_cls = criterion_cls(class_pred, labels)
        loss_bbox = criterion_bbox(bbox_pred, boxes)
        loss = loss_cls + loss_bbox
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader)}')

print('Finished Training')