## Imports

In [None]:
import cv2
import numpy as np
import os
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

## 1. Paramètres

In [None]:

DATASET_PATH = "dataset_path" #insérer le chemin d'accès au dataset'
TRAIN_DIR = os.path.join(DATASET_PATH, "train")
AUG_TRAIN_DIR = os.path.join(DATASET_PATH, "train_augmented")
VAL_DIR = os.path.join(DATASET_PATH, "val")
TEST_DIR = os.path.join(DATASET_PATH, "test")
AUG_PER_IMAGE = 10  # beaucoup plus d’images par image originale


## 2. Fonction d’augmentation

In [None]:
def apply_random_augmentation(image):
    rows, cols = image.shape

    # Random rotation
    angle = random.uniform(-45, 45)
    M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
    image = cv2.warpAffine(image, M, (cols, rows), borderValue=255)

    # Random scaling
    scale = random.uniform(0.8, 1.2)
    image = cv2.resize(image, (0, 0), fx=scale, fy=scale)

    # Center crop or pad
    if image.shape[0] > rows:
        start = (image.shape[0] - rows) // 2
        image = image[start:start+rows, start:start+cols]
    else:
        image = cv2.copyMakeBorder(image, 0, rows - image.shape[0], 0, cols - image.shape[1], cv2.BORDER_CONSTANT, value=255)

    # Translation
    dx = random.randint(-10, 10)
    dy = random.randint(-10, 10)
    M_trans = np.float32([[1, 0, dx], [0, 1, dy]])
    image = cv2.warpAffine(image, M_trans, (cols, rows), borderValue=255)

    # Flip
    if random.random() < 0.5:
        image = cv2.flip(image, 1)  # horizontal

    # Add noise
    noise = np.random.normal(0, 10, (rows, cols)).astype(np.uint8)
    image = cv2.add(image, noise)

    return image

## 3. Génération du dossier train_augmented

In [None]:
def generate_augmented_data(input_dir, output_dir, augmentations_per_image):
    for label in os.listdir(input_dir):
        input_class_path = os.path.join(input_dir, label)
        output_class_path = os.path.join(output_dir, label)
        os.makedirs(output_class_path, exist_ok=True)

        for img_name in os.listdir(input_class_path):
            img_path = os.path.join(input_class_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

            # Sauvegarder l’original
            cv2.imwrite(os.path.join(output_class_path, f"orig_{img_name}"), img)

            for i in range(augmentations_per_image):
                aug_img = apply_random_augmentation(img)
                aug_name = f"{os.path.splitext(img_name)[0]}_aug{i}.png"
                cv2.imwrite(os.path.join(output_class_path, aug_name), aug_img)

    print("Augmentation terminée.")

# Générer les données augmentées si elles n'existent pas encore
if not os.path.exists(AUG_TRAIN_DIR):
    print("Création des données augmentées...")
    generate_augmented_data(TRAIN_DIR, AUG_TRAIN_DIR, AUG_PER_IMAGE)

## 4. Extraction de caractéristiques

In [None]:
def extract_features(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return [0]*6

    contour = max(contours, key=cv2.contourArea)
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    circularity = 4 * np.pi * area / (perimeter**2) if perimeter > 0 else 0
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = float(w) / h if h > 0 else 0
    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    solidity = float(area) / hull_area if hull_area > 0 else 0
    epsilon = 0.02 * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    num_vertices = len(approx)

    return [area, perimeter, circularity, aspect_ratio, solidity, num_vertices]

## 5. Chargement du dataset

In [None]:
def load_dataset(folder_path):
    X, y = [], []
    for label in os.listdir(folder_path):
        class_dir = os.path.join(folder_path, label)
        if not os.path.isdir(class_dir):
            continue
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            features = extract_features(img_path)
            X.append(features)
            y.append(label)
    return np.array(X), np.array(y)

## 6. Préparation des données

In [None]:
print("Chargement des données augmentées...")
X_train, y_train = load_dataset(AUG_TRAIN_DIR)
X_val, y_val = load_dataset(VAL_DIR)
X_test, y_test = load_dataset(TEST_DIR)

## 7. Encodage des labels

In [None]:
encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_val_enc = encoder.transform(y_val)
y_test_enc = encoder.transform(y_test)


##  8. Entraînement

In [None]:
print("Entraînement du modèle...")
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train_enc)

## 9. Évaluation

In [None]:
print("\n--- Évaluation sur validation ---")
val_pred = clf.predict(X_val)
print(classification_report(y_val_enc, val_pred, target_names=encoder.classes_))

print("\n--- Évaluation sur test ---")
test_pred = clf.predict(X_test)
print(classification_report(y_test_enc, test_pred, target_names=encoder.classes_))

## 10. Matrice de confusion

In [None]:
ConfusionMatrixDisplay.from_predictions(
    y_test_enc,
    test_pred,
    display_labels=encoder.classes_,
    xticks_rotation=45,
    cmap='Blues'
)

## 11. Matrice de coût personnalisée

In [None]:
# L'ordre des classes doit correspond exactement à celui des classes encodées
shape_order = list(encoder.classes_)
cost_dict = {
    'circle':           [0, 7, 8, 8, 8, 8, 6, 9],
    'kite':             [7, 0, 4, 5, 3, 4, 5, 6],
    'parallelogram':    [8, 4, 0, 2, 3, 2, 3, 7],
    'rectangle':        [8, 5, 2, 0, 3, 1, 3, 6],
    'rhombus':          [8, 3, 3, 3, 0, 1, 4, 6],
    'square':           [8, 4, 2, 1, 1, 0, 3, 6],
    'trapezoid':        [6, 5, 3, 3, 4, 3, 0, 5],
    'triangle':         [9, 6, 7, 6, 6, 6, 5, 0]
}

# Créer la matrice de coût en respectant l'ordre encodé
cost_matrix = np.array([cost_dict[cls] for cls in shape_order])