In [1]:
%load_ext autoreload
%autoreload 2
#!pip install tifffile
#!pip install scikit-image
from pathlib import Path
import random

import numpy as np
from tifffile import TiffFile
import matplotlib.pyplot as plt
from tqdm import tqdm

from framework.dataset_modified import LandCoverData as LCD
from framework.dataset_modified import parse_image, load_image_train, load_image_test, numpy_parse_image


import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from skimage.io import imread

from weighted_loss import loss as weighted_categorical_cross_entropy
from PIL import Image

import segmentation_models as sm


Segmentation Models: using `keras` framework.


In [2]:
DATA_FOLDER_STR = '/home/homer/Documents/Master_Statistique/Apprentissage_automatique/challenge/benchmark/challenge-ens/data'
DATA_FOLDER = Path(DATA_FOLDER_STR).expanduser()
# path to the unzipped dataset: contains directories train/ and test/
DATASET_FOLDER = DATA_FOLDER/'dataset'

# get all train images and masks
train_images_paths = sorted(list(DATASET_FOLDER.glob('train/images/*.tif')))
train_masks_paths = sorted(list(DATASET_FOLDER.glob('train/masks/*.tif')))
# get all test images
test_images_paths = sorted(list(DATASET_FOLDER.glob('test/images/*.tif')))

In [3]:
def load_image_mask(image_path) :
    mask_path = image_path.parent.parent/'masks'/image_path.name
    with TiffFile(image_path) as tifi, TiffFile(mask_path) as tifm:
        image = tifi.asarray()
        mask = tifm.asarray()
        # add channel dimension to mask: (256, 256, 1)
        mask = mask[..., None]
    return image, mask

def compute_distribution(mask) :
    count = np.bincount(mask.ravel(), minlength=10)
    distrib = count/sum(count)
    return np.array(distrib)

def load_distributions(paths, n_max=1000) :
    ditributions = []
    for path in tqdm(paths):
        _, mask = load_image_mask(path)
        distrib = compute_distribution(mask)
        ditributions.append(distrib)
    return np.array(ditributions)

In [4]:
image_filenames_test = np.array([DATA_FOLDER_STR + '/dataset/test/images/' + path.name for path in test_images_paths])

In [5]:
labels = load_distributions(train_images_paths)

100%|██████████| 18491/18491 [01:06<00:00, 278.96it/s]


In [6]:
train_filenames, val_filenames, train_labels, val_labels = train_test_split(train_images_paths, labels, test_size=0.01)

In [7]:
BACKBONE = 'mobilenetv2'
preprocess_input = sm.get_preprocessing(BACKBONE)

In [8]:
class My_Custom_Generator(tf.keras.utils.Sequence) :
  
    def __init__(self, image_filenames, batch_size) :
        self.image_filenames = image_filenames
        self.batch_size = batch_size
    
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        return np.array([preprocess_input(self.load_image(file_name)) for file_name in batch_x])/255.0, np.array([tf.one_hot(self.load_mask(file_name), 10) for file_name in batch_x])
    
    def load_image(self, image_path, channels=4) :
        mask_path = image_path.parent.parent/'masks'/image_path.name
        with TiffFile(image_path) as tifi :
            image = tifi.asarray()[:,:,:channels]
        return image
    
    def load_mask(self, image_path):
        mask_path = image_path.parent.parent/'masks'/image_path.name
        with TiffFile(mask_path) as tifm :
            mask = tifm.asarray()
        return mask
    
class My_Custom_Generator_test(tf.keras.utils.Sequence) :
  
    def __init__(self, image_filenames, batch_size) :
        self.image_filenames = image_filenames
        self.batch_size = batch_size
    
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        return np.array([preprocess_input(self.load_image(file_name)) for file_name in batch_x])/255.0
    
    def load_image(self, image_path, channels=4) :
        with TiffFile(image_path) as tifi :
            image = tifi.asarray()[:,:,:channels]
        return image

In [9]:
batch_size=4
train_generator = My_Custom_Generator(train_filenames, batch_size)
val_generator = My_Custom_Generator(val_filenames, batch_size)

In [10]:
valtest_gen = My_Custom_Generator_test(val_filenames, 1)

In [11]:
test_generator = My_Custom_Generator_test(image_filenames_test, 1)

In [12]:
x, y = train_generator.__getitem__(0)
x.shape

(4, 256, 256, 4)

In [20]:
sm.set_framework('tf.keras')
sm.framework()

base_model = sm.Linknet(BACKBONE, encoder_weights='imagenet', classes=10, encoder_freeze=True, activation='softmax')

inp = tf.keras.layers.Input(shape=(None, None, 4))
l1 = tf.keras.layers.Conv2D(3, (1, 1))(inp)
out = base_model(l1)

model = tf.keras.models.Model(inp, out, name=base_model.name)
model.summary()

loss = sm.losses.categorical_focal_loss + sm.losses.jaccard_loss
model.compile(
    'Adam',
    loss=loss,
    metrics=[sm.metrics.iou_score, 'accuracy'],
)

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, None, None, 4)]   0         
_________________________________________________________________
conv2d_43 (Conv2D)           (None, None, None, 3)     15        
_________________________________________________________________
model_4 (Functional)         (None, None, None, 10)    4145882   
Total params: 4,145,897
Trainable params: 1,917,689
Non-trainable params: 2,228,208
_________________________________________________________________


In [None]:
hist = model.fit_generator(generator=train_generator,
                   steps_per_epoch = int(len(train_filenames) // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = val_generator,
                   validation_steps = int(len(val_filenames) // batch_size),
                   #class_weight = class_weight
                    )

Epoch 1/30
Epoch 2/30

* **Epoch 1** : En envrion 40 min on a : 
   * *train* $\rightarrow$ accuracy=0.6527, loss=0.8324
   * *test* $\rightarrow$ accuracy=0.65, loss=0.8330

In [13]:
def predict_as_vectors(model, dataset, steps=None):
    """Perform a forward pass over the dataset and bincount the prediction masks to return class vectors.
    Args:
        model (tf.keras.Model): model
        dataset (tf.data.Dataset): dataset to perform inference on
        steps (int, optional): the total number of steps (batches) in the dataset, used for the progress bar
    Returns:
        (pandas.DataFrame): predicted class distribution vectors for the dataset
    """
    def bincount_along_axis(arr, minlength=None, axis=-1):
        """Bincounts a tensor along an axis"""
        if minlength is None:
            minlength = tf.reduce_max(arr) + 1
        mask = tf.equal(arr[..., None], tf.range(minlength, dtype=arr.dtype))
        return tf.math.count_nonzero(mask, axis=axis-1 if axis < 0 else axis)

    predictions = []
    for batch in tqdm(dataset, total=steps):
        # predict a raster for each sample in the batch
        pred_raster = model.predict_on_batch(batch)

        (batch_size, _, _, num_classes) = tuple(pred_raster.shape)
        pred_mask = tf.argmax(pred_raster, -1) # (bs, 256, 256)
        # bincount for each sample
        counts = bincount_along_axis(
            tf.reshape(pred_mask, (batch_size, -1)), minlength=num_classes, axis=-1
        )
        predictions.append(counts / tf.math.reduce_sum(counts, -1, keepdims=True))

    predictions = tf.concat(predictions, 0)
    return predictions.numpy()


In [14]:
predictions = predict_as_vectors(model, valtest_gen)

100%|██████████| 1850/1850 [03:18<00:00,  9.30it/s]


In [15]:
from custom_metric_5SF9GKS import custom_metric_function

In [16]:
import pandas as pd
custom_metric_function(pd.DataFrame(predictions), pd.DataFrame(val_labels))

0.05893625727418147

In [18]:
predictions_test = predict_as_vectors(model, test_generator)

100%|██████████| 5043/5043 [08:46<00:00,  9.58it/s]


In [19]:
import pandas as pd
df = pd.DataFrame(predictions_test, columns=['no_data', 'clouds','artificial','cultivated','broadleaf','coniferous','herbaceous','natural','snow','water'])
df.to_csv('results/linknet/result2_dropout_mvnetfreezed.csv')