# Mini-projet n°1 – Chats ou Chiens ?

In [None]:
#Utils
import os
import shutil
import time

# Maths - Stats
from sklearn.utils import shuffle
import numpy as np
import pandas as pd
import random as rd

# Data visualization
from matplotlib import pyplot as plt
import seaborn as sns
from PIL import Image

# Deep Learning Librairies
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout,  GlobalAveragePooling2D
from tensorflow.keras.models import Sequential, clone_model, Model,load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16 


In [None]:
df = pd.read_csv(
    "./oxford-iiit-pet/annotations/list.txt",
    sep=r"\s+",                                         
    comment="#",                                        
    header=None,                                        
    names=["Image", "CLASS_ID", "SPECIES", "BREED_ID"]
)

df.info()

Le jeu de données est composé de 7349 individus.
Les variables sont les suivantes :
- Image : au format .jpg ;
- CLASS\_ID (race) : 37 races différentes ;
- SPECIES : chat ou chien (1 ou 2 selon l'espèce) ;
- BREED\_ID : identifiant de la race parmi l'espèce concernée.

Afin de rendre les données plus lisibles, nous rajoutons deux variables non numériques précisant les noms de l'espèce (SPECIES\_NAME) et de la race (BREED\_NAME) de chaque individu.

In [None]:
df["BREED_NAME"] = df["Image"].str.rsplit("_", n=1).str[0]           # Nom de la race
df["SPECIES_NAME"] = df["SPECIES"].map({1: "Cat", 2: "Dog"})         # Chat ou chien
df["Image"] = df["Image"].apply(lambda x: f"{x}.jpg")

print(df.sample(n=3, random_state=42))

In [None]:
species_counts = df["SPECIES_NAME"].value_counts()

plt.figure(figsize=(6, 6))
plt.pie(
    species_counts.values,
    labels=species_counts.index,
    autopct="%1.1f%%",
    startangle=90
)
plt.title("Répartition des individus par espèce")
plt.axis("equal")
plt.show()

67.7 % des individus du jeu de données sont des chiens et 32.3% sont des chats


In [None]:
pivot = pd.pivot_table(
    df,
    index="BREED_NAME",
    columns="SPECIES_NAME",
    values="Image",
    aggfunc="count"
)

pivot.plot(
    kind="bar",
    figsize=(14, 6)
)

plt.ylabel("Nombre d'individus")
plt.title("Nombre d'individus par race et par espèce")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

Le nombre d'individus est environ le même pour chaque race (200). Les données semblent a priori équilibré,  même si certaines races (Bombay, chat) n'ont pas autant d'individus que les autres.
On peut conclure cette sous-partie en avançant que les données sont suffisament bien réparties selon les espèces et les races. Il y a une légère sur-représentations des chiens, et les races sont légèrement déséquilibrés, mais cela ne devrait pas être impactant pour la suite.

In [None]:
img_dir = os.path.join(df, "images/")
mask_dir = os.path.join(df, "annotations/trimaps/")

# --- Ajout de la colonne des masques ---
# Les masques portent le même nom que l'image, mais en .png
# Votre colonne 'Image' contient déjà l'extension .jpg, on doit la remplacer
df['mask_filename'] = df['Image'].str.replace('.jpg', '.png')

# Vérification que les fichiers existent (optionnel mais recommandé)
# df = df[df['Image'].apply(lambda x: os.path.exists(os.path.join(img_dir, x)))]

# --- Division du Dataset (Train / Val / Test) ---
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['SPECIES_NAME'])
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['SPECIES_NAME'])

print(f"Train: {len(train_df)} | Validation: {len(val_df)} | Test: {len(test_df)}")

### Classification binaire

In [None]:

# --- Paramètres ---
IMG_SIZE = (128, 128) # Réduit pour soulager le CPU/GPU
BATCH_SIZE = 32

# --- Générateurs (Data Augmentation vue en TP) ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# --- Création du flux pour les RACES (Classification Fine) ---
# Pour Chats vs Chiens, changez juste y_col="SPECIES_NAME" et class_mode="binary"
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=img_dir,
    x_col="Image",
    y_col="SPECIES_NAME",      # Cible : Nom de la race
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical" # 37 classes
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=img_dir,
    x_col="Image",
    y_col="SPECIES_NAME",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

# --- Modèle Transfer Learning (VGG16) ---
# Comme vu dans la section "Pre-trained Network" du TP
base_model = VGG16(weights='imagenet', include_top=False, input_shape=IMG_SIZE + (3,))
base_model.trainable = False  # On gèle les poids

# Construction du classifier
x = base_model.output
x = GlobalAveragePooling2D()(x) # Ou Flatten() comme dans le TP simple
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(37, activation='softmax')(x) # 37 neurones pour les 37 races

model_clf = Model(inputs=base_model.input, outputs=predictions)

model_clf.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# model_clf.summary()
# history_clf = model_clf.fit(train_generator, validation_data=val_generator, epochs=10)

In [None]:

# Fonction d'analyse des résultats (Train vs Val + Test Score)
def analyser_resultats(model, history, test_generator, model_name="Modèle"):
    # 1. Courbes d'apprentissage
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 4))
    
    # Précision
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'{model_name} - Précision')

    # Perte (Loss)
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'{model_name} - Perte')
    plt.show()

    # 2. Évaluation sur le jeu de Test
    print(f"--- Évaluation finale sur le Test Set ({model_name}) ---")
    test_loss, test_acc = model.evaluate(test_generator)
    print(f"Test Accuracy: {test_acc*100:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

cnn

In [None]:
# Générateurs simples (juste mise à l'échelle)
simple_datagen = ImageDataGenerator(rescale=1./255)

train_gen_simple = simple_datagen.flow_from_dataframe(
    train_df, directory=img_dir, x_col='Image', y_col='BREED_NAME',
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)
val_gen_simple = simple_datagen.flow_from_dataframe(
    val_df, directory=img_dir, x_col='Image', y_col='BREED_NAME',
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)
test_gen_simple = simple_datagen.flow_from_dataframe(
    test_df, directory=img_dir, x_col='Image', y_col='BREED_NAME',
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)

# Architecture CNN Simple (Style AlexNet simplifié comme dans le TP)
def build_simple_cnn():
    model = Sequential([
        Input(shape=IMG_SIZE + (3,)),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(NUM_CLASSES, activation='softmax') # 37 classes
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Entraînement et Analyse
model_simple = build_simple_cnn()
history_simple = model_simple.fit(train_gen_simple, validation_data=val_gen_simple, epochs=10)

analyser_resultats(model_simple, history_simple, test_gen_simple, "CNN Simple")

data augmentation

In [None]:
# Générateur avec Augmentation (Rotation, Zoom, Flip...)
aug_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Seul le train est augmenté ! Val et Test restent "normaux"
train_gen_aug = aug_datagen.flow_from_dataframe(
    train_df, directory=img_dir, x_col='Image', y_col='BREED_NAME',
    target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)

# On réutilise le même modèle, mais entraîné sur des données augmentées
model_aug = build_simple_cnn() # On repart de zéro
history_aug = model_aug.fit(train_gen_aug, validation_data=val_gen_simple, epochs=15) # + d'epochs nécessaires

analyser_resultats(model_aug, history_aug, test_gen_simple, "CNN + Data Augmentation")

pre trained network

In [None]:
# Chargement de VGG16 sans la partie classification (include_top=False)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=IMG_SIZE + (3,))
base_model.trainable = False  # IMPORTANT : On gèle les poids du VGG

# Ajout de notre tête de classification
inputs = Input(shape=IMG_SIZE + (3,))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x) # Plus efficace que Flatten
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)

model_transfer = Model(inputs, outputs)
model_transfer.compile(optimizer=Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

# Entraînement (Rapide car on n'entraîne que les dernières couches)
history_transfer = model_transfer.fit(train_gen_aug, validation_data=val_gen_simple, epochs=10)

analyser_resultats(model_transfer, history_transfer, test_gen_simple, "Transfer Learning (VGG16)")

fine tuning

In [None]:
# 1. Dégeler le modèle de base
base_model.trainable = True

# 2. On gèle les premières couches (garder les features génériques) et on laisse les dernières libres
# VGG16 a ~19 layers. On fine-tune à partir de la couche 15 par exemple.
for layer in base_model.layers[:15]:
    layer.trainable = False

# 3. Re-compiler avec un learning rate TRÈS faible (pour ne pas tout casser)
model_transfer.compile(optimizer=Adam(learning_rate=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

# 4. Continuer l'entraînement
history_finetune = model_transfer.fit(train_gen_aug, validation_data=val_gen_simple, epochs=10)

analyser_resultats(model_transfer, history_finetune, test_gen_simple, "Fine Tuning")

### Classification fine

In [None]:

# --- Paramètres ---
IMG_SIZE = (128, 128) # Réduit pour soulager le CPU/GPU
BATCH_SIZE = 32

# --- Générateurs (Data Augmentation vue en TP) ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# --- Création du flux pour les RACES (Classification Fine) ---
# Pour Chats vs Chiens, changez juste y_col="SPECIES_NAME" et class_mode="binary"
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=img_dir,
    x_col="Image",
    y_col="BREED_NAME",      # Cible : Nom de la race
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical" # 37 classes
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=img_dir,
    x_col="Image",
    y_col="BREED_NAME",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

# --- Modèle Transfer Learning (VGG16) ---
# Comme vu dans la section "Pre-trained Network" du TP
base_model = VGG16(weights='imagenet', include_top=False, input_shape=IMG_SIZE + (3,))
base_model.trainable = False  # On gèle les poids

# Construction du classifier
x = base_model.output
x = GlobalAveragePooling2D()(x) # Ou Flatten() comme dans le TP simple
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(37, activation='softmax')(x) # 37 neurones pour les 37 races

model_clf = Model(inputs=base_model.input, outputs=predictions)

model_clf.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# model_clf.summary()
# history_clf = model_clf.fit(train_generator, validation_data=val_generator, epochs=10)

### Segmentation

In [None]:

class PetSegmentationGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, img_dir, mask_dir, batch_size=32, img_size=(128, 128)):
        self.df = df
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.batch_size = batch_size
        self.img_size = img_size
        
    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))
    
    def __getitem__(self, index):
        batch_df = self.df[index * self.batch_size : (index + 1) * self.batch_size]
        images = []
        masks = []
        
        for _, row in batch_df.iterrows():
            # 1. Charger l'image
            img_path = os.path.join(self.img_dir, row['Image'])
            img = load_img(img_path, target_size=self.img_size)
            img = img_to_array(img) / 255.0 # Normalisation
            
            # 2. Charger le masque
            mask_path = os.path.join(self.mask_dir, row['mask_filename'])
            # load_img en grayscale pour avoir (H, W) et pas (H, W, 3)
            mask = load_img(mask_path, target_size=self.img_size, color_mode="grayscale")
            mask = img_to_array(mask)
            
            # 3. Prétraitement du masque Oxford
            # Valeurs originales : 1 (Animal), 2 (Fond), 3 (Bordure)
            # On transforme en Binaire : 1 (Animal) vs 0 (Le reste)
            mask = np.where(mask == 1, 1.0, 0.0)
            
            images.append(img)
            masks.append(mask)
            
        return np.array(images), np.array(masks)

# Création des générateurs de segmentation
train_gen_seg = PetSegmentationGenerator(train_df, img_dir, mask_dir, BATCH_SIZE, IMG_SIZE)
val_gen_seg = PetSegmentationGenerator(val_df, img_dir, mask_dir, BATCH_SIZE, IMG_SIZE)

In [None]:


def build_unet(input_shape):
    inputs = Input(input_shape)

    # --- Contraction (Encoder) ---
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)
    
    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    # --- Bottleneck ---
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(c4)

    # --- Expansion (Decoder) ---
    u5 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = Concatenate()([u5, c3]) # Skip connection
    c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(u5)
    c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(c5)

    u6 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = Concatenate()([u6, c2])
    c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(u6)
    c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(c6)

    u7 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = Concatenate()([u7, c1])
    c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(u7)
    c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(c7)

    # --- Sortie ---
    # Sigmoid car on fait une segmentation binaire (pixel = animal ou pas)
    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c7)

    return Model(inputs=[inputs], outputs=[outputs])

unet_model = build_unet(IMG_SIZE + (3,))
unet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# unet_model.summary()
# history_unet = unet_model.fit(train_gen_seg, validation_data=val_gen_seg, epochs=15)

### Pour aller plus loin

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Conv2DTranspose, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def build_multitask_model(input_shape, num_classes):
    inputs = Input(input_shape, name='input_image')

    # --- 1. ENCODEUR PARTAGÉ (Shared Backbone) ---
    # On peut utiliser un VGG16 pré-entraîné ou faire le nôtre
    # Ici, un exemple simple "From Scratch" pour bien comprendre
    
    # Block 1
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x) # 64x64
    f1 = x # Skip connection pour la segmentation
    
    # Block 2
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x) # 32x32
    f2 = x 
    
    # Block 3 (Bottleneck partagé)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2, 2))(x) # 16x16
    
    # --- 2. BRANCHE CLASSIFICATION (Tête 1) ---
    c = GlobalAveragePooling2D()(encoded) # Aplatit les features
    c = Dense(128, activation='relu')(c)
    c = Dropout(0.5)(c)
    # Sortie 1 : Classification (Nommé 'class_output')
    class_output = Dense(num_classes, activation='softmax', name='class_output')(c)

    # --- 3. BRANCHE SEGMENTATION (Tête 2 - Decoder) ---
    # On remonte la résolution comme dans un U-Net
    
    # Upsample 1
    s = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(encoded) # 32x32
    s = Concatenate()([s, f2]) # On récupère l'info spatiale de l'encodeur
    s = Conv2D(64, (3, 3), activation='relu', padding='same')(s)
    
    # Upsample 2
    s = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(s) # 64x64
    s = Concatenate()([s, f1])
    s = Conv2D(32, (3, 3), activation='relu', padding='same')(s)
    
    # Upsample 3 (Retour taille originale)
    s = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(s) # 128x128
    
    # Sortie 2 : Segmentation (Nommé 'seg_output')
    seg_output = Conv2D(1, (1, 1), activation='sigmoid', name='seg_output')(s)

    # --- MODÈLE FINAL ---
    model = Model(inputs=inputs, outputs=[class_output, seg_output])
    return model

# Création du modèle
model_joint = build_multitask_model((128, 128, 3), num_classes=37)
model_joint.summary()

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import load_img, img_to_array, to_categorical

class MultiTaskGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, img_dir, mask_dir, batch_size, img_size, num_classes):
        self.df = df
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.num_classes = num_classes
        
        # Création d'un mapping race -> int si ce n'est pas déjà fait
        self.breeds = sorted(df['BREED_NAME'].unique())
        self.breed_to_idx = {b: i for i, b in enumerate(self.breeds)}

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))
    
    def __getitem__(self, index):
        batch_df = self.df[index * self.batch_size : (index + 1) * self.batch_size]
        
        images = []
        class_labels = []
        masks = []
        
        for _, row in batch_df.iterrows():
            # Image
            img = load_img(os.path.join(self.img_dir, row['Image']), target_size=self.img_size)
            images.append(img_to_array(img) / 255.0)
            
            # Label Classification (One-hot encoding)
            label_idx = self.breed_to_idx[row['BREED_NAME']]
            class_labels.append(label_idx)
            
            # Masque Segmentation
            mask_path = os.path.join(self.mask_dir, row['mask_filename'])
            mask = load_img(mask_path, target_size=self.img_size, color_mode="grayscale")
            mask = img_to_array(mask)
            mask = np.where(mask == 1, 1.0, 0.0) # Binarisation
            masks.append(mask)
            
        X = np.array(images)
        y_class = to_categorical(class_labels, num_classes=self.num_classes)
        y_seg = np.array(masks)
        
        # IMPORTANT : On renvoie un dictionnaire correspondant aux noms des couches de sortie
        return X, {'class_output': y_class, 'seg_output': y_seg}

# Instanciation
train_gen_multi = MultiTaskGenerator(train_df, img_dir, mask_dir, 32, (128, 128), 37)
val_gen_multi = MultiTaskGenerator(val_df, img_dir, mask_dir, 32, (128, 128), 37)

In [None]:
# On définit une loss différente pour chaque sortie
losses = {
    'class_output': 'categorical_crossentropy',
    'seg_output': 'binary_crossentropy' # Ou une Dice Loss personnalisée
}

# On peut donner plus d'importance à l'une ou l'autre
loss_weights = {
    'class_output': 1.0,  # Poids de la classification
    'seg_output': 1.0     # Poids de la segmentation
}

model_joint.compile(
    optimizer='adam',
    loss=losses,
    loss_weights=loss_weights,
    metrics={
        'class_output': 'accuracy', 
        'seg_output': 'accuracy' # Ou IoU
    }
)

# Entraînement
history = model_joint.fit(
    train_gen_multi,
    validation_data=val_gen_multi,
    epochs=15
)