In [1]:
import pandas as pd
from skimage import io, color
from skimage.feature import hog
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
import numpy as np
from PIL import Image
import os
import joblib

def load_images_and_labels(image_dir, label_dir):
    data = []
    labels = []
    columns = ['x1', 'y1', 'x2', 'y2', 'label']

    for filename in os.listdir(image_dir):
        if filename.endswith(".jpg"):
            image_path = os.path.join(image_dir, filename)
            csv_path = os.path.join(label_dir, filename[:-4] + ".csv")
            
            if not os.path.exists(csv_path):
                continue

            image = io.imread(image_path)
            if len(image.shape) > 2:
                image = color.rgb2gray(image)  # Convertir en niveaux de gris pour HOG

            df = pd.read_csv(csv_path, header=None, names=columns)
            for _, row in df.iterrows():
                label = row['label'].strip()
                if label == 'ff':
                    continue  # Ignorer les labels 'ff'
                
                if label == 'empty':
                    # Diviser l'image en 9 sous-images et utiliser chaque sous-image pour les cas 'empty'
                    height, width = image.shape
                    sub_images = [
                        image[:height//3, :width//3],
                        image[:height//3, width//3:2*width//3],
                        image[:height//3, 2*width//3:],
                        image[height//3:2*height//3, :width//3],
                        image[height//3:2*height//3, 2*width//3:],
                        image[2*height//3:, :width//3],
                    ]
                    for sub_image in sub_images:
                        sub_image_resized = Image.fromarray(sub_image).resize((100, 100))
                        hog_features = hog(np.array(sub_image_resized),orientations=16, pixels_per_cell=(8, 8),
                                           cells_per_block=(2, 2), visualize=False, feature_vector=True)
                        data.append(hog_features)
                        labels.append(label)  # Utiliser le label 'empty' pour chaque sous-image
                else:
                    x1, y1, x2, y2 = int(row['x1']), int(row['y1']), int(row['x2']), int(row['y2'])
                    if x1 >= x2 or y1 >= y2 or x1 < 0 or y1 < 0 or x2 > image.shape[1] or y2 > image.shape[0]:
                        continue
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        continue
                    roi_resized = Image.fromarray(roi).resize((100, 100))  # S'assurer que le redimensionnement est correct
                    roi_array = np.array(roi_resized)
                    hog_features = hog(roi_array,orientations=16, pixels_per_cell=(8, 8),
                                           cells_per_block=(2, 2), visualize=False, feature_vector=True)
                    data.append(hog_features)
                    labels.append(label)  # Utiliser le label spécifique du panneau

    return np.array(data, dtype='float64'), np.array(labels)  # Les labels seront convertis séparément

# Chemins vers les dossiers d'images et de labels
image_dir = 'train/images2'
label_dir = 'train/labels2'

# Charger les données
features, target = load_images_and_labels(image_dir, label_dir)

# Encoder les labels
label_encoder = LabelEncoder()
target_encoded = label_encoder.fit_transform(target)

# Création et entraînement du SVM avec probability=True
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(features, target_encoded)  # Utiliser les labels encodés
print("Le modèle SVM a été entraîné.")

Le modèle SVM a été entraîné.


In [20]:
import numpy as np
from skimage import io, color
from skimage.feature import hog
from skimage.transform import pyramid_gaussian
from joblib import Parallel, delayed
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import pandas as pd
import os

def sliding_window(image, step_size, window_size):
    for y in range(0, image.shape[0] - window_size[1], step_size):
        for x in range(0, image.shape[1] - window_size[0], step_size):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

def non_max_suppression(boxes, scores, labels, overlap_thresh=0.3):
    if len(boxes) == 0:
        return []

    boxes = np.array(boxes)
    scores = np.array(scores)
    labels = np.array(labels)

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        inter = w * h
        overlap = inter / (areas[i] + areas[order[1:]] - inter)

        overlap_indices = np.where(overlap > overlap_thresh)[0] + 1

        # Keep only the boxes with the highest score among those that overlap, considering labels and different scales
        for j in overlap_indices:
            if labels[order[j]] == labels[i] and scores[order[j]] < scores[i]:
                order = np.delete(order, j)
            elif labels[order[j]] == labels[i] and scores[order[j]] >= scores[i]:
                order = np.delete(order, 0)
                break

        order = np.delete(order, 0)

    return keep


def process_window(window, x, y, scale, window_size, model, label_encoder, valid_labels, threshold):
    if window.shape[0] != window_size[1] or window.shape[1] != window_size[0]:
        return None

    window_resized = Image.fromarray(window).resize((100, 100))
    hog_features = hog(np.array(window_resized), orientations=16, pixels_per_cell=(8, 8),
                       cells_per_block=(2, 2), visualize=False, feature_vector=True)

    hog_features = np.array(hog_features).reshape(1, -1)
    prediction = model.predict(hog_features)
    probability = model.predict_proba(hog_features)[0][prediction[0]]

    label = label_encoder.inverse_transform(prediction)[0]

    if label in valid_labels and probability >= threshold:
        return (int(x * scale), int(y * scale), int((x + window_size[0]) * scale), int((y + window_size[1]) * scale), label, probability)
    return None

def detect_signs_in_image(image_path, model, label_encoder, threshold=0.65, max_levels=6):
    image = io.imread(image_path)
    if len(image.shape) > 2:
        image = color.rgb2gray(image)

    pyramid = pyramid_gaussian(image, downscale=1.5)
    detected_boxes = []
    detected_labels = []
    detected_confidences = []

    window_size_1 = (64, 64)
    step_size_1 = 48
    valid_labels_1 = ['ceder', 'danger', 'interdiction', 'obligation', 'stop']

    window_size_2 = (64, 128)
    step_size_2 = 48
    valid_labels_2 = ['forange', 'frouge', 'fvert']

    level_count = 0

    for resized in pyramid:
        if level_count >= max_levels:
            break

        scale = image.shape[0] / float(resized.shape[0])

        results = Parallel(n_jobs=-1)(delayed(process_window)(
            window, x, y, scale, window_size_1, model, label_encoder, valid_labels_1, threshold)
            for (x, y, window) in sliding_window(resized, step_size_1, window_size_1)
        )
        results = [result for result in results if result is not None]
        for (x1, y1, x2, y2, label, confidence) in results:
            detected_boxes.append((x1, y1, x2, y2))
            detected_labels.append(label)
            detected_confidences.append(confidence)

        results = Parallel(n_jobs=-1)(delayed(process_window)(
            window, x, y, scale, window_size_2, model, label_encoder, valid_labels_2, threshold)
            for (x, y, window) in sliding_window(resized, step_size_2, window_size_2)
        )
        results = [result for result in results if result is not None]
        for (x1, y1, x2, y2, label, confidence) in results:
            detected_boxes.append((x1, y1, x2, y2))
            detected_labels.append(label)
            detected_confidences.append(confidence)

        level_count += 1

    indices = non_max_suppression(detected_boxes, detected_confidences, detected_labels, overlap_thresh=0.3)

    final_detected_boxes = [detected_boxes[i] for i in indices]
    final_detected_labels = [detected_labels[i] for i in indices]
    final_detected_confidences = [detected_confidences[i] for i in indices]

    # Filtrage pour conserver uniquement un type de feu par image
    fire_labels = ['forange', 'frouge', 'fvert']
    fire_detections = [(box, label, conf) for box, label, conf in zip(final_detected_boxes, final_detected_labels, final_detected_confidences) if label in fire_labels]
    if fire_detections:
        best_fire_label = max(fire_detections, key=lambda x: x[2])[1]  # Get the label with the highest confidence
        final_detected_boxes = [box for box, label, conf in zip(final_detected_boxes, final_detected_labels, final_detected_confidences) if label not in fire_labels or label == best_fire_label]
        final_detected_labels = [label for box, label, conf in zip(final_detected_boxes, final_detected_labels, final_detected_confidences) if label not in fire_labels or label == best_fire_label]
        final_detected_confidences = [conf for box, label, conf in zip(final_detected_boxes, final_detected_labels, final_detected_confidences) if label not in fire_labels or label == best_fire_label]

    detected_signs = [{"label": final_detected_labels[i], "box": final_detected_boxes[i], "confidence": final_detected_confidences[i]}
                      for i in range(len(final_detected_labels))]

    return detected_signs

def read_real_labels(image_path):
    csv_path = image_path.replace('images', 'labels').replace('.jpg', '.csv')
    if not os.path.exists(csv_path):
        return []

    columns = ['x1', 'y1', 'x2', 'y2', 'label']
    df = pd.read_csv(csv_path, header=None, names=columns)
    real_labels = [{"box": (row['x1'], row['y1'], row['x2'], row['y2']), "label": row['label']} for index, row in df.iterrows()]
    return real_labels

# Exemple d'utilisation
image_path = 'train/images/0002.jpg'
detected_signs = detect_signs_in_image(image_path, clf, label_encoder)
real_labels = read_real_labels(image_path)

print("Detected Signs:")
print(detected_signs)
print("\nReal Labels:")
print(real_labels)


Detected Signs:
[{'label': 'danger', 'box': (71, 431, 167, 527), 'confidence': 0.9741884141635011}, {'label': 'danger', 'box': (576, 96, 640, 160), 'confidence': 0.9598329116298248}, {'label': 'danger', 'box': (144, 528, 208, 592), 'confidence': 0.9340446679503872}, {'label': 'danger', 'box': (215, 647, 311, 743), 'confidence': 0.9259103678261708}, {'label': 'danger', 'box': (0, 484, 323, 808), 'confidence': 0.8835659294366363}, {'label': 'interdiction', 'box': (288, 0, 352, 64), 'confidence': 0.8827871081884444}, {'label': 'danger', 'box': (215, 647, 359, 791), 'confidence': 0.8571996507437492}, {'label': 'ceder', 'box': (624, 144, 688, 208), 'confidence': 0.8488426503219609}, {'label': 'interdiction', 'box': (240, 48, 304, 112), 'confidence': 0.8417652978260273}, {'label': 'fvert', 'box': (215, 287, 311, 479), 'confidence': 0.831528780559842}, {'label': 'danger', 'box': (0, 864, 64, 928), 'confidence': 0.8249915206148462}, {'label': 'ceder', 'box': (480, 0, 544, 64), 'confidence': 0.