# Playground Notebook

Simple playground notebook to quickly test idea. Will refine later

## 0. Imports

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import tifffile as tiff
from sklearn.model_selection import train_test_split
import datetime

# TF / Keras
import tensorflow as tf
from tensorflow.keras import layers as kl
from tensorflow.keras.models import Model

from sklearn.model_selection import train_test_split

In [None]:
%load_ext tensorboard

## 1. Data

In [None]:
# Train
train_df = pd.read_csv('../data/train_labels_GY1QjFw.csv')
train_images_df = pd.read_csv('../data/train_images_Es8kvkp.csv')

# Test
test_df = pd.read_csv('../data/test_predicted_random_rp2A5Fo.csv')
test_images_df = pd.read_csv('../data/test_images_kkwOpBC.csv')

In [None]:
px.scatter(pd.concat([train_images_df.assign(type = 'train'), 
                      test_images_df.assign(type = 'test')]) \
                        .sort_values(by='sample_id') \
                        .reset_index(drop=True),
            color = 'type')

In [None]:
# NOTE : remove 0 = clouds ?
class_weight = (train_df.drop(columns=['sample_id']).assign(no_data=0).assign(clouds=0).sum(axis=0).sum() / train_df.drop(columns=['sample_id']).assign(no_data=0).assign(clouds = 0).sum(axis=0)) \
    .reset_index()[0] \
    .to_dict()
class_weight[0] = 0
class_weight[1] = 0
class_weight

In [None]:
def data_handler(path, norm : int = 65536) : 
    """Simple handler
    """
    if norm :
        return tiff.imread(path) / norm
    else :
        return tiff.imread(path).astype(np.int16)

In [None]:
X = data_handler('../data/dataset/train/images/7089.tif')
X.shape, X.dtype

In [None]:
Y = data_handler('../data/dataset/train/masks/7089.tif', norm = False)
Y.shape, Y.dtype

In [None]:
def handler(i : int) :
    """ Global data handler"""
    return (data_handler(f'../data/dataset/train/images/{i}.tif'),
            data_handler(f'../data/dataset/train/masks/{i}.tif', norm = False))

In [None]:
X, Y = handler(7089)

In [None]:
px.imshow(X[...,0])

## 2. Model

Simple UNET (quick tests purposes) (shamelessly) inspired from 
https://github.com/earthcube-lab/challenge-ens/blob/master/framework/model.py

In [None]:
"""
U-Net model definition.
"""

def UNet(input_shape,
         num_classes=10,
         output_activation='softmax',
         num_layers=4):
    """
    Creates a U-Net model (Ronneberger et al 2015)
    Architecture adapted from github.com/karolzak/keras-unet/master/keras_unet/models/satellite_unet.py
    """

    def bn_conv_relu(input, filters, **conv2d_kwargs):
        x = kl.BatchNormalization()(input)
        x = kl.Conv2D(filters, activation='relu', **conv2d_kwargs)(x)
        return x

    def bn_upconv_relu(input, filters, **conv2d_transpose_kwargs):
        x = kl.BatchNormalization()(input)
        x = kl.Conv2DTranspose(filters, activation='relu', **conv2d_transpose_kwargs)(x)
        return x

    inputs = kl.Input(input_shape)

    # number of filters in a convolution in the contrastive path (constant)
    filters = 16
    # number of filters in a convolution in the dilative path (constant)
    upconv_filters = 24

    conv2d_kwargs = {
        'kernel_size': (3,3),
        'strides': (1,1),
        'padding': 'same',
        'kernel_initializer': 'he_normal'
    }
    conv2d_transpose_kwargs = {
        'kernel_size': (3,3),
        'strides': (2,2),
        'padding': 'same',
        'output_padding': (1,1)
    }
    maxpool2d_kwargs = {
        'pool_size': (2,2),
        'strides': (2,2),
        'padding': 'valid',
    }

    x = kl.Conv2D(filters, activation='relu', **conv2d_kwargs)(inputs)
    c1 = bn_conv_relu(x, filters, **conv2d_kwargs)
    x = bn_conv_relu(c1, filters, **conv2d_kwargs)
    x = kl.MaxPooling2D(**maxpool2d_kwargs)(x)

    down_layers = []

    for _ in range(num_layers):
        x = bn_conv_relu(x, filters, **conv2d_kwargs)
        x = bn_conv_relu(x, filters, **conv2d_kwargs)
        down_layers.append(x)
        x = bn_conv_relu(x, filters, **conv2d_kwargs)
        x = kl.MaxPooling2D(**maxpool2d_kwargs)(x)

    x = bn_conv_relu(x, filters, **conv2d_kwargs)
    x = bn_conv_relu(x, filters, **conv2d_kwargs)
    x = bn_upconv_relu(x, filters, **conv2d_transpose_kwargs)

    for conv in reversed(down_layers):
        x = kl.concatenate([x, conv])
        x = bn_conv_relu(x, upconv_filters, **conv2d_kwargs)
        x = bn_conv_relu(x, filters, **conv2d_kwargs)
        x = bn_upconv_relu(x, filters, **conv2d_transpose_kwargs)

    x = kl.concatenate([x, c1])
    x = bn_conv_relu(x, upconv_filters, **conv2d_kwargs)
    x = bn_conv_relu(x, filters, **conv2d_kwargs)

    outputs = kl.Conv2D(num_classes, kernel_size=(1,1), strides=(1,1), activation=output_activation, padding='valid') (x)

    model = Model(inputs=[inputs], outputs=[outputs], name='unet')
    return model

In [None]:
# Test
unet_kwargs = dict(
    input_shape=(256, 256, 4),
    num_classes=10,
    output_activation='softmax',
    num_layers=1
)
print(f"Creating U-Net with arguments: {unet_kwargs}")
model = UNet(**unet_kwargs)
print("Summary:")
print(model.summary())

input_batch = tf.random.normal((1, 256, 256, 4), name='random_normal_input')
output = model(input_batch)
print(output.shape)

## 3. Data Pipeline

In [None]:
train_inputs = np.array(train_df.sample_id)

In [None]:
X, Y = handler(np.random.choice(train_inputs))
px.imshow(X[...,0])

In [None]:
X_train, X_test = train_test_split(train_inputs, test_size=0.2, random_state=123)
X_train

In [None]:
X_train.shape

In [None]:
def gen(input_list, 
        class_weight : dict = class_weight) : 
    """Simple data generator, based on the handler previously defined. Will allow for data augmentation.
    """
    
    # Iterate over the inputs
    for i in input_list :

        # Get (x,y)
        x, y = handler(i)

        # Generates weights based on class_weight dict
        w = np.vectorize(class_weight.get)(y)

        # To categorical
        y = tf.keras.utils.to_categorical(y, num_classes=10)

        yield((x, y, w))

In [None]:
train_dataset =  tf.data.Dataset.from_generator(gen, args = [X_train], output_types=(tf.float32, tf.int16, tf.float32))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

test_dataset =  tf.data.Dataset.from_generator(gen, args = [X_test], output_types=(tf.float32, tf.int16, tf.float32))
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(64)

In [None]:
for ex in train_dataset.take(1) :
    break

### 3.1. Apply weights

According to https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-for-imbalanced-classes-in-keras, "if tf dataset is used you cannot use the class_weights parameter. Insted return the weight from a parse_function in your pipeline"

TODO Later

See also https://stackoverflow.com/questions/65881582/how-to-use-class-weights-in-keras-for-image-segmentation

## 4. Compile model

In [None]:
# There are multiple optimizers, loss functions and metrics that can be used to compile multi-class segmentation models
# Ideally, try different options to get the best accuracy
model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['categorical_accuracy'],
              sample_weight_mode="temporal",
              )

## 5. FIT !

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
model.fit(train_dataset, 
          epochs=5, 
          validation_data = test_dataset, 
          verbose = 1,
          callbacks=[tensorboard_callback],
        )