

---


**Notebook pour le training de modèle pour le PTRANS**

☕*Created by :*

*   Mattéo Boursault
*   Ibrahim Braham
*   Adam Creusevault

Dernière modification : *25/03/2022*

---



**pour laisser tourner sur une longue durée**

```
var startClickConnect = function startClickConnect(){
    var clickConnect = function clickConnect(){
        console.log("Connnect Clicked - Start");
        document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click();
        console.log("Connnect Clicked - End"); 
    };

    var intervalId = setInterval(clickConnect, 180000);

    var stopClickConnectHandler = function stopClickConnect() {
        console.log("Connnect Clicked Stopped - Start");
        clearInterval(intervalId);
        console.log("Connnect Clicked Stopped - End");
    };
```

In [None]:
'''
- Complétez le chemin des 'Verite_terrain'
- Modifiez les paramètres dans la section Configuration au besoin
- Executez toutes les cellules du Notebook
- Have fun
'''
VERITE_PATH = 'drive/MyDrive/PTRANS/ptrans-main/Dev/data/'

#### Import + Connexion au Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install albumentations==0.4.6
!CUDA_LAUNCH_BLOCKING=1 # plus utile de croiser les doigts mais bon 

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.optim as optim
import time
import copy
import torch.nn as nn

#### Dataset

In [None]:
class RadioDataset(Dataset):
    def __init__(self, inputs_chiens, inputs_chats, transform=None):
        super().__init__()

        self.transform = transform

        # Chemins des dossiers de la vérité terrain
        ROOT = VERITE_PATH + 'Verite_terrain/'
        DOG_PATH = 'Chiens/'
        CAT_PATH = 'Chats/'
        RADIOS = 'Radios/'
        HEART_MASKS = 'Coeur/'
        VTB_MASKS = 'Vertebres/'
        PROCESS_MASKS = 'Process_epineux/'

        # Récupération et stockage des chemins des fichiers
        dog_radios = self.generate_chemin(ROOT + DOG_PATH + RADIOS, inputs_chiens)
        cat_radios = self.generate_chemin(ROOT + CAT_PATH + RADIOS, inputs_chats)

        dog_hearts = self.generate_chemin(ROOT + DOG_PATH + HEART_MASKS, inputs_chiens, True)
        cat_hearts = self.generate_chemin(ROOT + CAT_PATH + HEART_MASKS, inputs_chats, True)

        dog_vtb = self.generate_chemin(ROOT + DOG_PATH + VTB_MASKS, inputs_chiens, True)
        cat_vtb = self.generate_chemin(ROOT + CAT_PATH + VTB_MASKS, inputs_chats, True)

        dog_prc = self.generate_chemin(ROOT + DOG_PATH + PROCESS_MASKS, inputs_chiens, True)
        cat_prc = self.generate_chemin(ROOT + CAT_PATH + PROCESS_MASKS, inputs_chats, True)

        self.radios_arr = dog_radios + cat_radios
        self.heart_arr = dog_hearts + cat_hearts
        self.vtb_arr = dog_vtb + cat_vtb
        self.process_arr = dog_prc + cat_prc

        self.data_len = len(self.radios_arr)

        print("GROUND_TRUTH FOUND : ", self.data_len)

    def __getitem__(self, idx):
        """
        Fonction obligatoire utilisée par le loader lors de l'itération du dataset
        Récupération du chemin et chargement de la vérité terrain puis labellisation
        """

        radio = np.array(Image.open(self.radios_arr[idx]))

        heart_mask = self.grayscale(self.heart_arr[idx])
        vtb_mask = self.grayscale(self.vtb_arr[idx])
        process_mask = self.grayscale(self.process_arr[idx])

        heart_mask[heart_mask != 0] = 1
        vtb_mask[vtb_mask != 0] = 2
        process_mask[process_mask != 0] = 3
        #masks = heart_mask + vtb_mask + process_mask

        
        masks = np.zeros_like(heart_mask)
        masks[heart_mask == 1] = 1
        masks[vtb_mask == 2] = 2
        masks[process_mask == 3] = 3
        

        if self.transform is not None:
            augmentations = self.transform(image=radio,mask=masks)
            radio = augmentations['image']
            masks = augmentations['mask']

        grayscale_transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                                  transforms.ToTensor()])
        radio = Image.fromarray(radio)
        radio = grayscale_transform(radio)

        """
        background = np.zeros_like(masks) 
        heart_mask = np.zeros_like(masks)
        vtb_mask = np.zeros_like(masks)
        process_mask = np.zeros_like(masks)

        background[masks == 0] = 1
        heart_mask[masks == 1] = 1
        vtb_mask[masks == 2] = 1
        process_mask[masks == 3] = 1
        
        #masks = torch.tensor([heart_mask, vtb_mask, process_mask, background])
        masks = np.array([heart_mask, vtb_mask, process_mask, background])
        """

        """
        Utiliser from_numpy plutôt que toTensor permet de ne pas normaliser les
        données afin de respecter la labellisation des classes par 0, 1, 2 et 3
        """
        return {"image": radio, "mask": torch.from_numpy(masks).to(DEVICE)}


    def __len__(self):
        """
        Méthode nécessaire au loader qui retourne le
        nombre de données dans le dataset
        """
        return self.data_len


    def grayscale(self, path):
        image = Image.open(path)
        image.load()
        background = Image.new("RGB", image.size, (0, 0, 0))
        background.paste(image, mask=image.split()[3])
        image = background
        image = image.convert('L')
        return np.array(image)

    def generate_chemin(self, path, tableau, jpgTopng=False):
        tab = []
        if (jpgTopng):
          for i in range(len(tableau)):
            tab.append(path + tableau[i][0:-3] + 'png')
        else:
          for i in range(len(tableau)):
            tab.append(path + tableau[i])
        return tab

#### Modèle

In [None]:
# modele UNet :

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Transposed convolution then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels , out_channels, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_channels, out_channels)


    def forward_padding(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = nn.functionnal.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

    def forward(self, target_tensor, contracting_tensor):
        target_tensor = self.up(target_tensor)
        target_height = target_tensor.size()[2]
        target_width = target_tensor.size()[3]
        crop = transforms.CenterCrop((target_height, target_width))

        contracting_tensor = crop(contracting_tensor)
        new_tensor = torch.cat((target_tensor, contracting_tensor), 1)

        return self.conv(new_tensor)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)
        self.up1 = Up(1024, 512)
        self.up2 = Up(512, 256)
        self.up3 = Up(256, 128)
        self.up4 = Up(128, 64)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

#### Configuration

In [None]:
MODELE = UNet                 # modèle utilisé, voir dans la partie Modèle du notebook la liste des modèles disponibles
NB_CLASSES = 4                # 4 pour : coeur, vertebres, process_epineux, fond
NB_CHANNELS = 1               # un vecteur en sortie (la prédiction du modèle)
LEARNING_RATE = 0.001         # le taux d'apprentissage
TRAIN_SIZE=0.9                # le % de segmentations allouées au jeu d'entrainement (1-TRAIN_SIZE pour la taille du jeu de validation)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # pour utiliser CUDA si nécessaire, ne pas toucher
BATCH_SIZE = 1                # la taille de votre batch
EPOCHS = 80                   # le nombre de fois ou vous souhaitez itérer sur l'ensemble du jeu de donnée pour entrainer votre modèle
IMAGE_HEIGHT = 560            # la taille souhaité des radio en entrée du modèle (max 572 sans colab pro, 1520 avec), doit être un multiple de 16 (prérequis du modèle UNet)
IMAGE_WIDTH = IMAGE_HEIGHT

#### Data Augmentations (with Albumentation)

In [None]:
# les transformations à appliquer à partir de la bibliothèque Albumentations qui vont servir à resize les radios et les masques en dimension unique 572 x 572
# la réduction des dimensions va permettre que le training se déroule sans plantage de la GPU dû à un débordement de mémoire  
# et aussi permettre la data augmentation en appliquant un nombre important d'epochs car les transformations sont aléatoires.

train_transform = A.Compose([
    A.Rotate(limit=20,p=0.4),
    A.HorizontalFlip(p=0.4),
    A.VerticalFlip(p=0.4),
    A.Transpose(p=0.4),
    A.GridDistortion(p=0.2),
    A.Resize(IMAGE_HEIGHT,IMAGE_WIDTH)
], additional_targets={'mask': 'image'})

validation_transform = A.Compose([
    A.Resize(IMAGE_HEIGHT,IMAGE_WIDTH),
], additional_targets={'mask': 'image'})

#### Création des Datasets

In [None]:
# on a éclaté les data (radios et masques) en data train (90%) et data validation (10%)
from sklearn.model_selection import train_test_split

inputs_chiens = os.listdir('drive/MyDrive/PTRANS/ptrans-main/Dev/data/Verite_terrain/Chiens/Radios')
inputs_chats = os.listdir('drive/MyDrive/PTRANS/ptrans-main/Dev/data/Verite_terrain/Chats/Radios')

chiens_train, chiens_test = train_test_split(inputs_chiens, train_size=TRAIN_SIZE)
chats_train, chats_test = train_test_split(inputs_chats, train_size=TRAIN_SIZE)

# création des datasets pour le train et pour la validation
# création des dataloaders pour le train et pour la validation

train_data = RadioDataset(chiens_train, chats_train, train_transform)
valid_data = RadioDataset(chiens_test, chats_test, validation_transform)
train_dataloader = DataLoader(train_data,batch_size=BATCH_SIZE,shuffle=True)
valid_dataloader = DataLoader(valid_data,batch_size=BATCH_SIZE,shuffle=False)

GROUND_TRUTH FOUND :  230
GROUND_TRUTH FOUND :  27


#### Lancement du training

In [None]:
# la fonction fit déroule le training 
def fit(model,dataloader,data,optimizer,criterion):
    print('-------------Training---------------')
    model.train()
    train_running_loss = 0.0
    counter=0
    
    # num of batches
    num_batches = int(len(data)/dataloader.batch_size)
    for i,data in tqdm(enumerate(dataloader),total = num_batches):
        counter+=1
        image,mask = data["image"].to(DEVICE), data["mask"].type('torch.LongTensor').to(DEVICE)
        optimizer.zero_grad()
        outputs = model(image)
        outputs = outputs.squeeze(1)
        loss = criterion(outputs,mask)
        train_running_loss += loss.item()
        loss.backward()
        optimizer.step()
    train_loss = train_running_loss/counter
    return train_loss

# la fonction validate évalue le modèle à la fin de chaque epoch
def validate(model,dataloader,data,criterion):
    print("\n--------Validating---------\n")
    model.eval()
    valid_running_loss = 0.0
    counter = 0
    # number of batches
    num_batches = int(len(data)/dataloader.batch_size)
    with torch.no_grad():
        for i,data in tqdm(enumerate(dataloader),total=num_batches):
            counter+=1
            image,mask = data["image"].to(DEVICE), data["mask"].type('torch.LongTensor').to(DEVICE)
            outputs = model(image)
            outputs = outputs.squeeze(1)
            loss = criterion(outputs,mask)
            valid_running_loss += loss.item()
    valid_loss = valid_running_loss/counter
    return valid_loss

In [None]:
# Execution du training et sauvegarde du meilleur modèle et de la courbe des Loss
path_save = "drive/MyDrive/model.pth"

lowest_val_loss=10.
train_loss = []
val_loss =[]
model = UNet(NB_CHANNELS, NB_CLASSES).to(DEVICE)
optimizer = optim.Adam(model.parameters(),lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss() # mais la BCEWithLogitsLoss est plus performante

for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1} of {EPOCHS}")
    train_epoch_loss = fit(model, train_dataloader, train_data, optimizer, criterion)
    val_epoch_loss = validate(model, valid_dataloader, valid_data, criterion)
    train_loss.append(train_epoch_loss)
    val_loss.append(val_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f'Val Loss: {val_epoch_loss:.4f}')
    if val_epoch_loss < lowest_val_loss:  # On ne sauvegarde le modèle que si on réduit la validation Loss  
        lowest_val_loss= val_epoch_loss
        torch.save({
        'epoch': EPOCHS,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': criterion,
          }, path_save)

# loss plots
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color="orange", label='train loss')
plt.plot(val_loss, color="red", label='validation loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig("drive/MyDrive/loss.png")
plt.show()

#### Test du modèle

In [None]:
MODEL_PATH = path_save

radio2 = valid_data[11]["image"]
mask2 = valid_data[11]["mask"]

model = UNet(1, 4)
model.load_state_dict(torch.load(MODEL_PATH)["model_state_dict"])
model.eval()
model.to(DEVICE)

output = model(radio2.unsqueeze(1).to(DEVICE))

output2 = output[0].cpu().data.numpy()
index = output2.argmax(axis=0)

index = index*150
fig = plt.figure()
plt.imshow(index)

In [None]:
plt.imshow(mask2.to('cpu'))

In [None]:
plt.imshow(radio2[0])