# Prérequis
0. Installation des libairies necessaire pour le développement

In [None]:
%pip install -r requirements.txt

## Récupération d'un ensemble de données d'IRM cérébrale
Source : https://www.kaggle.com/datasets/navoneel/brain-mri-images-for-brain-tumor-detection

# Importation des bibliothèques

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, ConcatDataset
import glob
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import random
import cv2
import sys

## Lire les images IRM 

In [None]:
def load_images(directory, img_size=(128, 128)):
    images = []
    path = f'{directory}/*.[jJ][pP][gG]'
    
    for file in glob.iglob(path):
        try:
            # Read and resize image
            img = cv2.imread(file)
            if img is None:
                print(f"Warning: Could not read image {file}")
                continue
                
            img = cv2.resize(img, img_size)
            
            # Convert BGR to RGB (OpenCV loads as BGR by default)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            images.append(img)
        except Exception as e:
            print(f"Error processing {file}: {e}")
    
    return images

# Load tumor and healthy brain images
tumor = load_images('data/brain_tumor_dataset/yes')
healthy = load_images('data/brain_tumor_dataset/no')

print(f"Loaded {len(tumor)} tumor images and {len(healthy)} healthy images")

In [None]:
tumor = np.array(tumor)
healthy = np.array(healthy)

tumor_and_healthy = np.concatenate((healthy, tumor))

In [None]:
healthy.shape
# (amount_of_files, width, height, channel) -> Each channel has a width and height of 128x128 

## Visualiser les images IRM 

In [None]:
def plot_random(healthy, tumor, num=5):
    # This generates 5(num) numbers from 0 to 96 with no dublicate
    healthy_imgs = healthy[np.random.choice(healthy.shape[0], num, replace=False)]
    tumor_imgs = tumor[np.random.choice(tumor.shape[0], num, replace=False)]

    # Displaying healthy images
    plt.figure(figsize=(16,9))
    for i in range(num):
        plt.subplot(1, num, i+1)
        plt.title('Healthy')
        plt.imshow(healthy_imgs[i])

    # Displaying images with tumors
    plt.figure(figsize=(16,9))
    for i in range(num):
        plt.subplot(1, num, i+1)
        plt.title('Tumor')
        plt.imshow(tumor_imgs[i])

In [None]:
plot_random(healthy, tumor, 10)

## La class Dataset de PyTorch

In [None]:
class Dataset(object):
    """Cette class est une class abstraite representant un Dataset

    Toute autre class de dataset devrait etre une sous class de celle-ci.
    Et chaque class devrait 'Ecraser' ``__len__``, qui retourne la taille du dataset, et
    ``__getitem__``, qui supporte les index en entier qui va de 0 a len(self) exclusive.
    """
    def __getitem__(self, index):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def __add__(self, other):
        return ConcatDataset([self, other])

## Creation de la class IRM 

In [None]:
class IRM(Dataset):
    def __init__(self):
        
        tumor = []
        healthy = []
        # cv2 - It reads in BGR format by default
        for f in glob.iglob("./data/brain_tumor_dataset/yes/*.jpg"):
            img = cv2.imread(f)
            img = cv2.resize(img,(128,128)) 
            b, g, r = cv2.split(img)
            img = cv2.merge([r,g,b])
            img = img.reshape((img.shape[2],img.shape[0],img.shape[1]))
            tumor.append(img)

        for f in glob.iglob("./data/brain_tumor_dataset/no/*.jpg"):
            img = cv2.imread(f)
            img = cv2.resize(img,(128,128)) 
            b, g, r = cv2.split(img)
            img = cv2.merge([r,g,b])
            img = img.reshape((img.shape[2],img.shape[0],img.shape[1]))
            healthy.append(img)

        # Nos images
        tumor = np.array(tumor,dtype=np.float32)
        healthy = np.array(healthy,dtype=np.float32)
        
        # Nos titres
        tumor_label = np.ones(tumor.shape[0], dtype=np.float32)
        healthy_label = np.zeros(healthy.shape[0], dtype=np.float32)
        
        # Concatenation des deux
        self.images = np.concatenate((tumor, healthy), axis=0)
        self.labels = np.concatenate((tumor_label, healthy_label))
        
    def __len__(self):
        return self.images.shape[0]
    
    def __getitem__(self, index):
        
        sample = {'image': self.images[index], 'label':self.labels[index]}
        
        return sample
    
    def normalize(self):
        self.images = self.images/255.0

In [None]:
irm = IRM()
irm.normalize()

# Extraction des données (DataLoader)

In [None]:
index = list(range(len(irm)))
random.shuffle(index)

for i in index:
    sample = irm[i]
    img = sample['image']
    label = sample['label']
    img = img.reshape(img.shape[1], img.shape[2], img.shape[0])
    plt.title(label)
    plt.imshow(img)
    plt.show()

In [None]:
it = iter(irm)
for i in range(10):
    sample = next(it)
    img = sample['image']
    label = sample['label']
    img = img.reshape(img.shape[1], img.shape[2], img.shape[0])
    plt.title(label)
    plt.imshow(img)
    plt.show()

## Utilisation du DataLoader

In [None]:
# batch_size c'est pour avoir plusieur image dans un 'batch' : 
# - torch.Size([10, 3, 128, 128]): c'est a dire que chaque iteration on a 10 images en une fois
# shuggle c'est pour mixer les image (tumeur, sans tumeur)
dataloader = DataLoader(irm, batch_size=10, shuffle=True)

In [None]:
for sample in dataloader:
    img = sample['image']
    print(img.shape)
    #img = img.reshape(img.shape[1], img.shape[2], img.shape[0])
    #plt.imshow(img)
    #plt.show()
    #print(img.shape)
    #sys.exit(0)

## Creation du CNN

$$
n_{\text{out}} = \lfloor \frac{n_{\text{in}} + 2p - f}{s} + 1 \rfloor
$$
- $f$ = kernel_size
- $s$ = stride
- $p$ = padding
- $n_{in}$ = dimension of the input data (which is the output of the previous layer)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# PyTorch veux qu'on herite de nn.Module (une sous classe)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnn_model = nn.Sequential(
            # Premier couche convolutive (LOW LEVEL)
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5), 
            nn.Tanh(), # ca permet de transformer nos données entre [-1, 1]
            nn.AvgPool2d(kernel_size=2, stride=5, padding=0),
            # Deuxieme couche conv. (Mid-Level)
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2, stride=5, padding=0)
        )

        self.fc_model = nn.Sequential(
            nn.Linear(in_features=256, out_features=120),
            nn.Tanh(),
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=1)
        )
        
    def forward(self, x):
        x = self.cnn_model(x)
        x = x.view(x.size(0), -1) # applati les 2D array
        x = self.fc_model(x)
        x = F.sigmoid(x)

        return x

## Analyse des parametres du model

In [None]:
model = CNN()

In [None]:
model

In [None]:
model.cnn_model

In [None]:
model.cnn_model[0]

In [None]:
model.cnn_model[0].weight.shape

In [None]:
model.cnn_model[0].weight[0][0]

## Couche linear

In [None]:
model.fc_model[0].weight.shape

## Explication de x.view(x.size(0), -1)

In [None]:
x = torch.tensor([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
x = x.reshape((2,2,2,2))
x.size() # retourne (2,2,2,2)
x.size(0) # return 2
x.view(-1) # tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])
y = x.view(x.size(0), -1) # tensor([[ 1,  2,  3,  4,  5,  6,  7,  8],
                      # [ 9, 10, 11, 12, 13, 14, 15, 16]])
y.shape

## torche.testor vs. torch.cuda.tensor
### Les tensor sur le CPU sont pas de meme type que les tensor sur GPU

In [None]:
device = torch.device(
    'mps' if torch.backends.mps.is_available() else # Apple 
    'cuda' if torch.cuda.is_available() else # Nvidia
    'cpu'
)

cpu_tensor = torch.rand(10)
gpu_tensor = torch.rand(10).to(device)

print(cpu_tensor, cpu_tensor.dtype, type(cpu_tensor), cpu_tensor.type())
print(gpu_tensor, gpu_tensor.dtype, type(gpu_tensor), gpu_tensor.type())

In [None]:
# Convertir un tensor -> numpy array
gpu_tensor.cpu().detach().numpy()

## Test CNN (sans entrainement)

In [None]:
irm_dataset = IRM()
irm_dataset.normalize()
device = torch.device(
    'mps' if torch.backends.mps.is_available() else # Apple 
    'cuda' if torch.cuda.is_available() else # Nvidia
    'cpu'
)
model = CNN().to(device)

In [None]:
dataloader = DataLoader(irm_dataset, batch_size=32, shuffle=False)

In [None]:
# model.eval - Deactive le dropout (pour que pas tout les neuronnes sont actives)
model.eval()
output = []
y_true = []


with torch.no_grad():
    for D in dataloader:
        image = D['image'].to(device)
        label = D['label'].to(device)
    
        y_hat = model(image)
    
        output.append(y_hat.cpu().detach().numpy())
        y_true.append(label.cpu().detach().numpy())


In [None]:
output = np.concatenate(output, axis=0).squeeze()
y_true = np.concatenate(y_true, axis=0).squeeze()

In [None]:
def threshold(scores, threshold=0.50, minimum=0, maximum=1.0):
    x = np.array(list(scores))
    x[x >= threshold] = maximum
    x[x < threshold] = minimum
    return x

In [None]:
accuracy_score(y_true, threshold(output))

In [None]:
import seaborn as sns

plt.figure(figsize=(16,9))
cm = confusion_matrix(y_true, threshold(output))
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax, annot_kws={"size":20})

ax.set_xlabel('Predicted labels', fontsize=20)
ax.set_ylabel('True labels', fontsize=20)
ax.set_title('Confusion Matrix', fontsize=20)
ax.xaxis.set_ticklabels(['Healthy', 'Tumor'], fontsize=20)
ax.yaxis.set_ticklabels(['Tumor', 'Healthy'], fontsize=20)

## Entrainer le model

In [None]:
eta = 0.0001
EPOCH = 400
optimizer = torch.optim.Adam(model.parameters(), lr=eta)
dataloader = DataLoader(irm_dataset, batch_size=32, shuffle=True)
model.train()

In [None]:
for epoch in range(1, EPOCH):
    losses = []
    for D in dataloader:

        optimizer.zero_grad() # Important
        
        data = D['image'].to(device)
        label = D['label'].to(device)
        y_hat = model(data)

        # Definir la fonction de perte (loss)
        error = nn.BCELoss()
        loss = torch.sum(error(y_hat.squeeze(), label))
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    if (epoch+1) % 10 == 0:
        print('Train Epoch: {} Loss: {:.6f}'.format(epoch+1, np.mean(losses)))
    

## Evaluation du model après entrainement

In [None]:
model.eval()
dataloader = DataLoader(irm_dataset, batch_size=32, shuffle=False)
output = []
y_true = []
with torch.no_grad():
    for D in dataloader:
        image = D['image'].to(device)
        label = D['label'].to(device)

        y_hat = model(image)

        output.append(y_hat.cpu().detach().numpy())
        y_true.append(label.cpu().detach().numpy())

output = np.concatenate(output, axis=0)
y_true = np.concatenate(y_true, axis=0)

In [None]:
accuracy_score(y_true, threshold(output))

In [None]:
plt.figure(figsize=(16,9))
cm = confusion_matrix(y_true, threshold(output))
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax, annot_kws={"size":20})

ax.set_xlabel('Predicted labels', fontsize=20)
ax.set_ylabel('True labels', fontsize=20)
ax.set_title('Confusion Matrix', fontsize=20)
ax.xaxis.set_ticklabels(['Healthy', 'Tumor'], fontsize=20)
ax.yaxis.set_ticklabels(['Tumor', 'Healthy'], fontsize=20)

In [None]:
plt.figure(figsize=(16,9))
plt.plot(output)
plt.axvline(x=len(tumor), color='r', linestyle='--')
plt.grid()

## Visualiser utilisant une 'Feature Map'

In [None]:
model

In [None]:
no_of_layer = 0
conv_layers = []

In [None]:
model_children = list(model.children())
model_children

In [None]:
for child in model_children:
    if type(child) == nn.Sequential:
        for layer in child.children():
            if type(layer) == nn.Conv2d:
                no_of_layer += 1
                conv_layers.append(layer)
                

In [None]:
conv_layers

In [None]:
img = irm_dataset[100]['image']
plt.imshow(img.reshape(128,128,3))

In [None]:
img = torch.from_numpy(img).to(device)

In [None]:
img.shape

In [None]:
img = img.unsqueeze(0)
img.shape

## Feature Map

In [None]:
results = [conv_layers[0](img)]
for i in range(1, len(conv_layers)):
    results.append(conv_layers[i](results[-1]))
output = results

In [None]:
len(output)

In [None]:
output[0].shape

In [None]:
for num_layer in range(len(output)):
    plt.figure(figsize=(50,10))
    layer_viz = output[num_layer].squeeze()
    print("Layer ", num_layer+1)
    for i, f in enumerate(layer_viz):
        plt.subplot(2, 8, i + 1)
        plt.imshow(f.detach().cpu().numpy())
        plt.axis("off")
    plt.show()
    plt.close()

## GRAD-CAM

In [None]:
# TODO