In [1]:
%load_ext autoreload
%autoreload 2
#!pip install tifffile
#!pip install scikit-image
from pathlib import Path
import random

import numpy as np
from tifffile import TiffFile
import matplotlib.pyplot as plt
from tqdm import tqdm

from framework.dataset_modified import LandCoverData as LCD
from framework.dataset_modified import parse_image, load_image_train, load_image_test, numpy_parse_image


import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from skimage.io import imread

In [2]:
DATA_FOLDER_STR = '/home/homer/Documents/Master_Statistique/Apprentissage_automatique/challenge/benchmark/challenge-ens/data'
DATA_FOLDER = Path(DATA_FOLDER_STR).expanduser()
# path to the unzipped dataset: contains directories train/ and test/
DATASET_FOLDER = DATA_FOLDER/'dataset'

# get all train images and masks
train_images_paths = sorted(list(DATASET_FOLDER.glob('train/images/*.tif')))
train_masks_paths = sorted(list(DATASET_FOLDER.glob('train/masks/*.tif')))
# get all test images
test_images_paths = sorted(list(DATASET_FOLDER.glob('test/images/*.tif')))

In [3]:
def load_image_mask(image_path) :
    mask_path = image_path.parent.parent/'masks'/image_path.name
    with TiffFile(image_path) as tifi, TiffFile(mask_path) as tifm:
        image = tifi.asarray()
        mask = tifm.asarray()
        # add channel dimension to mask: (256, 256, 1)
        mask = mask[..., None]
    return image, mask

def compute_distribution(mask) :
    count = np.bincount(mask.ravel(), minlength=10)
    distrib = count/sum(count)
    return np.array(distrib)

def load_distributions(paths, n_max=1000) :
    ditributions = []
    for path in tqdm(paths):
        _, mask = load_image_mask(path)
        distrib = compute_distribution(mask)
        ditributions.append(distrib)
    return np.array(ditributions)

In [8]:
labels = load_distributions(train_images_paths)
image_filenames = np.array([DATA_FOLDER_STR + '/dataset/train/images/' + path.name for path in train_images_paths])

100%|██████████| 18491/18491 [00:47<00:00, 386.00it/s]


In [9]:
labels = labels[:,[1,2,3,4,5,6,7,9]]

In [11]:
image_filenames_test = np.array([DATA_FOLDER_STR + '/dataset/test/images/' + path.name for path in test_images_paths])

In [12]:
len(image_filenames_test)

5043

In [13]:
train_filenames, val_filenames, train_labels, val_labels = train_test_split(image_filenames, labels, test_size=0.25)

In [51]:
class My_Custom_Generator(tf.keras.utils.Sequence) :
  
    def __init__(self, image_filenames, labels, batch_size) :
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
    
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
        return np.array([self.load_image(file_name) for file_name in batch_x])/255.0, np.array(batch_y)
    
    def load_image(self, image_path, channels=3) :
        with TiffFile(image_path) as tifi :
            image = tifi.asarray()[:,:,:channels]
        return image
    
class My_Custom_Generator_test(tf.keras.utils.Sequence) :
  
    def __init__(self, image_filenames, batch_size) :
        self.image_filenames = image_filenames
        self.batch_size = batch_size
    
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        return np.array([self.load_image(file_name) for file_name in batch_x])/255.0
    
    def load_image(self, image_path, channels=3) :
        with TiffFile(image_path) as tifi :
            image = tifi.asarray()[:,:,:channels]
        return image

In [15]:
batch_size=8
train_generator = My_Custom_Generator(train_filenames, train_labels, batch_size)
val_generator = My_Custom_Generator(val_filenames, val_labels, batch_size)

In [16]:
test_generator = My_Custom_Generator_test(image_filenames_test, batch_size)

In [20]:
mnV2 = tf.keras.applications.MobileNetV3Large(input_shape=(256,256,3), include_top=False, weights='imagenet')
for layer in mnV2.layers:
    layer.trainable =  False
model = models.Sequential()
model.add(mnV2)
model.add(layers.Conv2D(1, (3,3)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.1))
model.add(layers.Dense(8, activation='softmax'))



In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Functi (None, 8, 8, 1280)        2257984   
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 6, 6, 1)           11521     
_________________________________________________________________
flatten_1 (Flatten)          (None, 36)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 36)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 296       
Total params: 2,269,801
Trainable params: 11,817
Non-trainable params: 2,257,984
_________________________________________________________________


In [53]:
model.compile(optimizer='adam',
              loss='kl_divergence',
              metrics=['mean_squared_error', 'kullback_leibler_divergence'])

model.fit_generator(generator=train_generator,
                   steps_per_epoch = int(len(train_filenames) // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = val_generator,
                   validation_steps = int(len(val_filenames) // batch_size))

Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

# Idées 
* Entrainer avec MSE vs KL-div
* Utiliser un modèle préentrainé type mobileNetV2
* Change batch_size

In [24]:
predictions = model.predict(test_generator)

In [34]:
N = len(image_filenames_test)
predictions_final = np.c_[ np.zeros(N), predictions[:,0:7], np.zeros(N), predictions[:,7] ]

In [36]:
import pandas as pd
df = pd.DataFrame(predictions_final, columns=['no_data', 'clouds','artificial','cultivated','broadleaf','coniferous','herbaceous','natural','snow','water'])

In [37]:
df.to_csv('results/mvnet/result2_dropout.csv')