In [None]:
!pip install matplotlib

In [None]:
!pip install tifffile
!pip install tqdm # progresbar
!pip3 install torch torchvision torchaudio -f https://download.pytorch.org/whl/torch_stable.html

In [None]:

import os
import s3fs
import shutil
import torch

from pathlib import Path
import random
from collections import OrderedDict

import numpy as np
from tifffile import TiffFile
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from tqdm import tqdm
from PIL import Image
from torch.utils.data import DataLoader,  random_split
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [None]:
if torch.cuda.is_available() : device= torch.device("cuda:0" )
else : device = "cpu"

print("Using {} device".format(device))
if torch.cuda.is_available() :
    print("nom du GPU :", torch.cuda.get_device_name(device=None))
    print("GPU initialisé : ", torch.cuda.is_initialized())

In [None]:
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://minio.lab.sspcloud.fr'})
fs.get('projet-funathon/2022/Sujet9_deep_learning_donnees_satellites/additional_files_earthcube_emu4zqr.zip', 'additional_files_earthcube_emu4zqr.zip')
shutil.unpack_archive('additional_files_earthcube_emu4zqr.zip')

In [None]:
DATA_FOLDER_STR = 'dataset'
DATA_FOLDER = Path(DATA_FOLDER_STR).expanduser()
DATASET_FOLDER = DATA_FOLDER

# get all train images and masks
train_images_paths = sorted(list(DATASET_FOLDER.glob('train/images/*.tif')))
train_masks_paths = sorted(list(DATASET_FOLDER.glob('train/masks/*.tif')))

In [None]:
class CustomDataset(Dataset):
    def __init__(self, image_paths,mask_paths):   # initial logic happens like transform
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        
    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        with TiffFile(self.mask_paths[idx]) as tif :
            mask = tif.asarray()
        
        with TiffFile(self.image_paths[idx]) as tif :
            image = np.array(tif.asarray())
     
        t_mask = torch.tensor(mask,dtype = torch.long)
        image = torch.tensor(np.array(image,dtype = float), dtype =torch.float)
        
        ID = str(self.mask_paths[idx])
     
        return {"image": image, "masque" : t_mask, "id" : ID} 
        
        
    def __len__(self):  
        return len(self.mask_paths)

In [None]:
class LandCoverData():
   
    IMG_SIZE = 256
    N_CHANNELS = 4
    N_CLASSES = 10
    MEAN_CHANNEL = [ 339.42029674, 570.98497474,  539.11161384, 2634.49868179] 
    STD_CHANNEL = [ 339.79895785, 404.86935149,  549.41877854, 1071.38939764]
    COUNT_CLASS =  np.array([0, 20643, 60971025, 404760981, 277012377, 96473046, 333407133, 9775295, 1071, 29404605])
    WEIGHT_CLASS = np.array([0.0000e+00, 0.0000e+00, 1.6401e-08, 2.4706e-09, 3.6099e-09, 1.0366e-08,
        2.9993e-09, 1.0230e-07, 9.3371e-04, 3.4008e-08])*np.sum(COUNT_CLASS)
    CLASSES = [
    'no_data',
    'clouds',
    'artificial',
    'cultivated',
    'broadleaf',
    'coniferous',
    'herbaceous',
    'natural',
    'snow',
    'water']
    
    TRAINSET_SIZE = 18491
    TESTSET_SIZE = 5043
    
    CLASSES_COLORPALETTE = {
    0: [0,0,0],
    1: [255,25,236],
    2: [215,25,28],
    3: [211,154,92],
    4: [33,115,55],
    5: [21,75,35],
    6: [118,209,93],
    7: [130,130,130],
    8: [255,255,255],
    9: [43,61,255]
    }
    CLASSES_COLORPALETTE = {c: np.asarray(color) for (c, color) in CLASSES_COLORPALETTE.items()}


In [None]:
dataset = CustomDataset(train_images_paths,train_masks_paths)

# construction d'un itérateur
iterateur = iter(dataset)

# récupération du premier jeu (image,masque) du dataset
element_dataset = next(iterateur)
image = element_dataset["image"]
masque = element_dataset["masque"]

print(image.shape)
print(masque.shape)


In [None]:
def show_image(image, display_min=50, display_max=400, ax=None):
    """Show an image.
    Args:
        image (numpay.array[uint16]): the image. If the image is 16-bit, apply bytescaling to convert to 8-bit
    """
    if image.dtype == np.uint16:
        iscale = display_max - display_min
        scale = 255 / iscale
        byte_im = (image) * scale
        byte_im = (byte_im.clip(0, 255) + 0.5).astype(np.uint8)
        image = byte_im
    # show image
    plt.imshow(image)
    plt.show()

def show_mask(mask, classes_colorpalette, classes=None, add_legend=True, ax=None):
    """Show a a semantic segmentation mask.
    Args:
       mask (numpy.array[uint8]): the mask in 8-bit
       classes_colorpalette (dict[int, tuple]): dict mapping class index to an RGB color in [0, 1]
       classes (list[str], optional): list of class labels
       add_legend
    """
    show_mask = np.empty((*mask.shape, 3))
    for c, color in classes_colorpalette.items():
        show_mask[mask == c, :] = color
    show_mask = show_mask.astype(np.uint8)
    
    plt.imshow(show_mask)
    handles = []
    for c, color in LandCoverData.CLASSES_COLORPALETTE.items():        
        handles.append(mpatches.Patch(color=color/255, label=LandCoverData.CLASSES[c]))
    plt.legend(handles=handles)
    plt.show()

In [None]:
print("Image")
show_image(np.array(image).astype(np.uint16),display_min = 0, display_max = 2200)

print("masque")
show_mask(np.array(masque),LandCoverData.CLASSES_COLORPALETTE)

In [None]:
config ={
    'monitoring' : True,
    'freq monitoring':50,
    'n_epoch' : 80,
    'train_size' : 15000,
    'batch_size' :  28,
    'optimizer' : "SGD",
    'lr' : 0.003,    
    'momentum' : 0.9,
    'model type': "segmentation mask",
    'init_features'  : 16,
    'validation_n_batch' : 2000,
    'descriptif': "Entrainement avec un unet pour segmentation + cross entropy"
}

In [None]:

batch_size =  config['batch_size']
all_dataset = CustomDataset(train_images_paths,train_masks_paths)
all_loader = DataLoader(all_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

train_size = config['train_size']
val_size = len(all_dataset.mask_paths) - train_size
#dans la liste donner la taille du train et la taille deu test
train_dataset, valid_dataset = random_split(all_dataset,[train_size,val_size], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_dataset, batch_size=batch_size, 
                          shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [None]:
class Unet(nn.Module):
    def __init__(self, init_features,in_channels=4, out_channels=10):
        super(Unet, self).__init__()
        features = init_features
        self.encoder1 = Unet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = Unet._block(features, features * 4, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bottleneck = Unet._block(features * 4, features * 8, name="bottleneck")

        self.upconv2 = nn.ConvTranspose2d(
            features * 8, features * 4, kernel_size=2, stride=2
        )
        self.decoder2 = Unet._block((features * 4) * 2, features * 4, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(
            features * 4, features, kernel_size=2, stride=2
        )
        self.decoder1 = Unet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(
            in_channels=features, out_channels=out_channels, kernel_size=1
        )

    def forward(self, x):
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool1(enc1))
        
        bottleneck = self.bottleneck(self.pool2(enc2))

        dec2 = self.upconv2(bottleneck)
        dec2 = torch.cat((dec2, enc2), dim=1)
        dec2 = self.decoder2(dec2)
        dec1 = self.upconv1(dec2)
        dec1 = torch.cat((dec1, enc1), dim=1)
        dec1 = self.decoder1(dec1)
        
        return torch.sigmoid(self.conv(dec1))

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            OrderedDict(
                [
                    (name + "conv1", nn.Conv2d(in_channels=in_channels, out_channels=features, kernel_size=3, padding=1, bias=False,),),
                    (name + "Batchnorm1", nn.BatchNorm2d(num_features=features)),
                    (name + "relu1", nn.ReLU(inplace=True)),
                    (name + "conv2",nn.Conv2d(in_channels=features, out_channels=features, kernel_size=3, padding=1, bias=False,),),
                    (name + "Batchnorm2", nn.BatchNorm2d(num_features=features)),
                    (name + "relu2", nn.ReLU(inplace=True)),
                ]
            )
        )

    

In [None]:
class Unet_dl(nn.Module):
    def __init__(self,init_features):
        super().__init__()
        self.unet = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
        in_channels= 4, out_channels=10, init_features= init_features, pretrained=False, verbose = False)

    def forward(self,x):
        x = self.unet(x)
        return(x)

In [None]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

In [None]:
net = Unet(config['init_features'])
get_n_params(net) 

In [None]:
net = Unet(config['init_features'])

In [None]:
optimizer = optim.SGD(net.parameters(), lr=config['lr'], momentum=config['momentum'])

net = net.to(device)

entropy = nn.CrossEntropyLoss()

for epoch in range(config['n_epoch']): 
    
       
        net = net.to(device)    

        running_loss = 0.0

        t= tqdm(train_loader, desc="epoch %i" % (epoch+1),position = 0, leave=True)
        epoch_loop = enumerate(t)

        for i, data in epoch_loop:

            taille_batch = data['image'].shape[0]
            images = data['image'].permute(0,3,1,2)
            masques  =  data['masque']

            
            images, masques = images.to(device), masques.long().to(device)

            y_hat = net(images)

            optimizer.zero_grad()
            loss = entropy(y_hat,masques)

            loss.backward()
            optimizer.step()

            del images, masques, y_hat # libéreer un peu d'espace

            running_loss += loss.item()
            if (i+1) % config['freq monitoring'] == 0:  
                t.set_description("epoch %i, 'mean loss: %.6f'" % (epoch+1,running_loss/config['freq monitoring']))
                t.refresh()
                running_loss =0