Imports

In [6]:
import h5py
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np

Chargement dataset no aug

In [None]:
# Chargement des données
dataset_simple_path = 'D:/Data/data/dog_dataset_no_aug.h5'
#dataset_augmented_path = 'D:/Data/data/dog_dataset_aug_normal.h5'
#dataset_aug_lots_path = 'D:/Data/data/dog_dataset_aug_lots.h5'

# Classe personnalisée pour charger les données
class DogBreedDataset(Dataset):
    def __init__(self, dataset_simple_path, transform=None):
        self.file_path = dataset_simple_path
        self.transform = transform
        
        # Charger uniquement les dimensions
        with h5py.File(self.file_path, "r") as f:
            if "images" in f:
                self.data_len = f["images"].shape[0] # type: ignore
            else:
                raise KeyError("Dataset 'images' not found in the HDF5 file.")
        
    def __len__(self):
        return self.data_len

    def __getitem__(self, idx):
        with h5py.File(self.file_path, "r") as f:
            image = f["images"][idx] / 255.0  # type: ignore # Normalisation
            label = f["labels"][idx] # type: ignore
        
        # Appliquer une transformation éventuelle
        if self.transform:
            image = self.transform(image)

        # Format PyTorch
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

# Initialisation des datasets
dataset = DogBreedDataset(dataset_simple_path)

# Division en ensembles
from sklearn.model_selection import train_test_split

indices = list(range(len(dataset)))
train_indices, temp_indices = train_test_split(indices, test_size=0.3, random_state=42)
val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=42)

# Création des DataLoaders
from torch.utils.data import Subset

train_loader = DataLoader(Subset(dataset, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(Subset(dataset, val_indices), batch_size=32, shuffle=False)
test_loader = DataLoader(Subset(dataset, test_indices), batch_size=32, shuffle=False)


Chargement dataset aug

In [8]:
# Chargement des données
dataset_augmented_path = 'D:/ApprentissageMachineTPFinal/data/dog_dataset_aug_normal.h5'

# Classe personnalisée pour charger les données
class DogBreedDataset(Dataset):
    def __init__(self, dataset_augmented_path, transform=None):
        self.file_path = dataset_augmented_path
        self.transform = transform
        
        # Charger uniquement les dimensions
        with h5py.File(self.file_path, "r") as f:
            if "images" in f:
                self.data_len = f["images"].shape[0] # type: ignore
            else:
                raise KeyError("Dataset 'images' not found in the HDF5 file.")
        
    def __len__(self):
        return self.data_len

    def __getitem__(self, idx):
        with h5py.File(self.file_path, "r") as f:
            image = f["images"][idx] / 255.0  # type: ignore # Normalisation
            label = f["labels"][idx] # type: ignore
        
        # Appliquer une transformation éventuelle
        if self.transform:
            image = self.transform(image)

        # Format PyTorch
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

# Initialisation des datasets
dataset = DogBreedDataset(dataset_augmented_path)

# Division en ensembles
from sklearn.model_selection import train_test_split

indices = list(range(len(dataset)))
train_indices, temp_indices = train_test_split(indices, test_size=0.3, random_state=42)
val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=42)

# Création des DataLoaders
from torch.utils.data import Subset

train_loader = DataLoader(Subset(dataset, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(Subset(dataset, val_indices), batch_size=32, shuffle=False)
test_loader = DataLoader(Subset(dataset, test_indices), batch_size=32, shuffle=False)

Chargement dataset lots

In [4]:
# Chargement des données
dataset_aug_lots_path = 'D:/ApprentissageMachineTPFinal/data/dog_dataset_aug_lots.h5'

# Classe personnalisée pour charger les données
class DogBreedDataset(Dataset):
    def __init__(self, dataset_aug_lots_path, transform=None):
        self.file_path = dataset_aug_lots_path
        self.transform = transform
        
        # Charger uniquement les dimensions
        with h5py.File(self.file_path, "r") as f:
            if "images" in f:
                self.data_len = f["images"].shape[0] # type: ignore
            else:
                raise KeyError("Dataset 'images' not found in the HDF5 file.")
        
    def __len__(self):
        return self.data_len

    def __getitem__(self, idx):
        with h5py.File(self.file_path, "r") as f:
            image = f["images"][idx] / 255.0  # type: ignore # Normalisation
            label = f["labels"][idx] # type: ignore
        
        # Appliquer une transformation éventuelle
        if self.transform:
            image = self.transform(image)

        # Format PyTorch
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

# Initialisation des datasets
dataset = DogBreedDataset(dataset_aug_lots_path)

# Division en ensembles
from sklearn.model_selection import train_test_split

indices = list(range(len(dataset)))
train_indices, temp_indices = train_test_split(indices, test_size=0.3, random_state=42)
val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=42)

# Création des DataLoaders
from torch.utils.data import Subset

train_loader = DataLoader(Subset(dataset, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(Subset(dataset, val_indices), batch_size=32, shuffle=False)
test_loader = DataLoader(Subset(dataset, test_indices), batch_size=32, shuffle=False)

Creation du modele

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# Définition du modèle CNN
class DogBreedCNN(nn.Module):
    def __init__(self, num_classes=120):  # 120 classes de races de chiens
        super(DogBreedCNN, self).__init__()

        # couches de convolution
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, num_classes)

        # Pooling et convolution
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Convolutional layers with ReLU and pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))

        # Flatten the output
        x = x.view(-1, 128 * 28 * 28)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

# Initialisation du modèle
model = DogBreedCNN(num_classes=120)

Entrainement no aug

In [None]:
import torch.optim as optim

# Configurations
learning_rate = 0.0001
num_epochs = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Boucle d'entraînement
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

Epoch [1/25], Loss: 4.7823
Epoch [2/25], Loss: 4.7702
Epoch [3/25], Loss: 4.7689
Epoch [4/25], Loss: 4.7681
Epoch [5/25], Loss: 4.7674
Epoch [6/25], Loss: 4.7676
Epoch [7/25], Loss: 4.7669
Epoch [8/25], Loss: 4.7666
Epoch [9/25], Loss: 4.7660
Epoch [10/25], Loss: 4.7660
Epoch [11/25], Loss: 4.7666
Epoch [12/25], Loss: 4.7665
Epoch [13/25], Loss: 4.7666
Epoch [14/25], Loss: 4.7661
Epoch [15/25], Loss: 4.7662
Epoch [16/25], Loss: 4.7665
Epoch [17/25], Loss: 4.7664
Epoch [18/25], Loss: 4.7662
Epoch [19/25], Loss: 4.7657
Epoch [20/25], Loss: 4.7658
Epoch [21/25], Loss: 4.7657
Epoch [22/25], Loss: 4.7656
Epoch [23/25], Loss: 4.7658
Epoch [24/25], Loss: 4.7655
Epoch [25/25], Loss: 4.7656


Entrainement lots

In [None]:
import torch.optim as optim

# Configurations
learning_rate = 0.001
num_epochs = 25

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Boucle d'entraînement
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

Evaluation du modele

In [8]:
from sklearn.metrics import classification_report

# Evaluation
y_true = []
y_pred = []

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Rapport de classification
print(classification_report(y_true, y_pred, zero_division=1))

              precision    recall  f1-score   support

           0       1.00      0.00      0.00        38
           1       1.00      0.00      0.00         9
           2       1.00      0.00      0.00        28
           3       1.00      0.00      0.00        33
           4       1.00      0.00      0.00        24
           5       1.00      0.00      0.00        24
           6       1.00      0.00      0.00        12
           7       1.00      0.00      0.00        27
           8       1.00      0.00      0.00        30
           9       1.00      0.00      0.00        12
          10       1.00      0.00      0.00        34
          11       1.00      0.00      0.00        28
          12       1.00      0.00      0.00        11
          13       1.00      0.00      0.00        22
          14       1.00      0.00      0.00        20
          15       1.00      0.00      0.00        21
          16       1.00      0.00      0.00        19
          17       1.00    