## En utilisant Pytorch le resizing est obligatoire. On va donc essayer avec tensorflow pour comparer entre les deux.

### Préparation des données

In [1]:
#----------------------------------------------------------------------------#

import numpy as np
import tensorflow as tf
import os
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torch.utils.data import Dataset, random_split, Subset
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, AveragePooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Input
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from sklearn.model_selection import train_test_split

#----------------------------------------------------------------------------#

In [4]:
#----------------------------------------------------------------------------#

# Chemin du dossier principal (ne pas oublier de copier les images car la transformation est permanente)
main_dir = r'C:\Users\Nicolas\Documents\DataScience\MAR24_BDS_Radios_Pulmonaire\data\processed\test NB\LENET_processing'

# Sous dossiers et labels correspondants
folders = {
    'COVID_norm': 0,
    'Non-COVID_norm': 1,
    'Normal_norm': 2
}

# On récupère les images et leurs labels
image_paths = []
image_labels = []

for folder, label in folders.items():
    folder_path = os.path.join(main_dir, folder)
    for img in os.listdir(folder_path):
        if img.endswith(('.png', '.jpg', '.jpeg')):  
            image_paths.append(os.path.join(folder_path, img))
            image_labels.append(label)

test_size = 0.20  # 20% of the entire dataset for the test set
val_size = 0.25

# Splitting data into train+validation and test sets
X_train, X_temp, y_train, y_temp = train_test_split(
    image_paths, image_labels, test_size=test_size, random_state=42)

# Splitting train+validation into train and validation sets
X_test, X_val, y_test, y_val = train_test_split(
    X_temp, y_temp, test_size=val_size, random_state=42)

# Check the size of each set
print(f"Training set: {len(X_train)} samples")
print(f"Validation set: {len(X_val)} samples")
print(f"Test set: {len(X_test)} samples")


#----------------------------------------------------------------------------#

Training set: 27136 samples
Validation set: 1696 samples
Test set: 5088 samples


In [None]:
#----------------------------------------------------------------------------#

## Préparation des données sans le resizing

#----------------------------------------------------------------------------#

class XRayDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = Image.open(image_path).convert('L')  # On convertit en niveau de gris
        label = self.labels[index]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([  
    transforms.ToTensor(),        
])

# On crée les datasets
train_dataset = XRayDataset(train_paths, train_labels, transform=transform)
val_dataset = XRayDataset(val_paths, val_labels, transform=transform)
test_dataset = XRayDataset(test_paths, test_labels, transform=transform)

# On crée les dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
#----------------------------------------------------------------------------#

## Construction du modèle LeNet

#----------------------------------------------------------------------------#

# Architecture du modèle
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # Layer 1: Convolutional Layer
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding='valid')
        # Layer 2: Subsampling Layer
        self.pool1 = nn.AvgPool2d(kernel_size=2)
        # Layer 3: Convolutional Layer
        self.conv2 = nn.Conv2d(6, 16, kernel_size=3, stride=1, padding='valid')
        # Layer 4: Subsampling Layer
        self.pool2 = nn.AvgPool2d(kernel_size=2)
        # Fully Connected Layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # From the flattened output
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 3)  # Output layer for 3 classes

    def forward(self, x):
        # Pass through Layer 1
        x = self.conv1(x)
        x = F.relu(x)
        # Pass through Layer 2
        x = self.pool1(x)
        # Pass through Layer 3
        x = self.conv2(x)
        x = F.relu(x)
        # Pass through Layer 4
        x = self.pool2(x)
        # Flatten the output for dense layer
        x = x.view(x.size(0), -1)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Output layer
        x = self.fc3(x)
        return x       # taille des batchs


In [None]:

# Model setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LeNet5().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            
            running_loss += loss.item()

        # Print average loss for the epoch
        avg_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
        
        # Validation loss
        model.eval()  # Set model to evaluate mode
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total
        print(f'Validation Accuracy: {val_accuracy:.2f}%')

# Call the training function
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=16)