# Modelo

Iteración rápida con un modelo de U-Net

In [1]:
from sklearn.preprocessing import MinMaxScaler

from matplotlib import colors
from skimage import exposure
from tqdm.notebook import tqdm

# =================
# Tensorflow
# =================

from tqdm import tqdm_notebook, tnrange
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

import tensorflow as tf

from keras.models import Model, load_model
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout, MaxPool2D, UpSampling2D, Concatenate
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D
from keras.layers.merge import concatenate, add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from scipy import ndimage

#=========================

import rasterio as rio
import rasterio.plot as rio_plot
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import json
import typing as typ
# Semilla pseudoaleatoria
np.random.seed(24)

# Tamaño de las figuras
plt.rcParams["figure.figsize"] = (20,10)

# Ruta base de la carpeta de datos
DATA_BASE = "/home/ggonzr_cloud/deeplearn/data"

# Imagen
HEIGHT = 256
WIDTH = 256
CHANNELS = 4

## Definición del modelo

### Version del laboratorio

In [2]:
def conv2d_block(input_tensor, n_filters, kernel_size = 3, batchnorm = True):
    """Function to add 2 convolutional layers with the parameters passed to it"""
    # first layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # second layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    return x

In [3]:
def get_unet(input_img, n_filters = 16, dropout = 0.1, batchnorm = True):
    """Function to define the UNET Model"""
    # Contracting Path (Encoder)
    c1 = conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    p1 = MaxPooling2D((2, 2))(c1)
    p1 = Dropout(dropout)(p1)
    
    c2 = conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    p2 = MaxPooling2D((2, 2))(c2)
    p2 = Dropout(dropout)(p2)
    
    c3 = conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    p3 = MaxPooling2D((2, 2))(c3)
    p3 = Dropout(dropout)(p3)
    
    c4 = conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    p4 = MaxPooling2D((2, 2))(c4)
    p4 = Dropout(dropout)(p4)
    
    c5 = conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)
    
    # Expansive Path (Decoder)
    u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
    u6 = concatenate([u6, c4])
    u6 = Dropout(dropout)(u6)
    c6 = conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    
    u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
    u7 = concatenate([u7, c3])
    u7 = Dropout(dropout)(u7)
    c7 = conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    
    u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
    u8 = concatenate([u8, c2])
    u8 = Dropout(dropout)(u8)
    c8 = conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    
    u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
    u9 = concatenate([u9, c1])
    u9 = Dropout(dropout)(u9)
    c9 = conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    
    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)
    model = Model(inputs=[input_img], outputs=[outputs])
    return model

### Versión V2

Link: https://github.com/nikhilroxtomar/Multiclass-Segmentation-in-Unet/blob/master/model.py

In [4]:
def conv_block(inputs, filters, pool=True):
    x = Conv2D(filters, 3, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    if pool == True:
        p = MaxPool2D((2, 2))(x)
        return x, p
    else:
        return x

def build_unet(shape, num_classes):
    inputs = Input(shape)

    """ Encoder """
    x1, p1 = conv_block(inputs, 16, pool=True)
    x2, p2 = conv_block(p1, 32, pool=True)
    x3, p3 = conv_block(p2, 48, pool=True)
    x4, p4 = conv_block(p3, 64, pool=True)

    """ Bridge """
    b1 = conv_block(p4, 128, pool=False)

    """ Decoder """
    u1 = UpSampling2D((2, 2), interpolation="bilinear")(b1)
    c1 = Concatenate()([u1, x4])
    x5 = conv_block(c1, 64, pool=False)

    u2 = UpSampling2D((2, 2), interpolation="bilinear")(x5)
    c2 = Concatenate()([u2, x3])
    x6 = conv_block(c2, 48, pool=False)

    u3 = UpSampling2D((2, 2), interpolation="bilinear")(x6)
    c3 = Concatenate()([u3, x2])
    x7 = conv_block(c3, 32, pool=False)

    u4 = UpSampling2D((2, 2), interpolation="bilinear")(x7)
    c4 = Concatenate()([u4, x1])
    x8 = conv_block(c4, 16, pool=False)

    """ Output layer """
    output = Conv2D(num_classes, 1, padding="same", activation="softmax")(x8)

    return Model(inputs, output)

## Funciones de carga de datos

In [5]:
def create_img_array(num_img, heigth, width, channels: int = 1) -> typ.Tuple[np.array, np.array]:
    array_rsp = np.zeros((num_img, heigth, width, channels), dtype=np.float32)    
    return array_rsp

In [6]:
def load_channel_raster(path_raster_tiff: str, channel: int = 1) -> np.array:
    rsp = None
    with rio.open(path_raster_tiff, "r") as rf:
        rsp = rf.read(channel)
    return rsp 

In [7]:
def load_source_img(img_folder_path: str) -> np.array:
    # Obtener la referencia a las bandas RGB + infrarrojo cercano
    red_channel = glob.glob(f"{img_folder_path}/B04.*")[0]
    green_channel = glob.glob(f"{img_folder_path}/B03.*")[0]
    blue_channel = glob.glob(f"{img_folder_path}/B02.*")[0]
    infrared_channel = glob.glob(f"{img_folder_path}/B08.*")[0]
    
    # Cargar las cuatro bandas
    channels_list = [
        red_channel, green_channel, blue_channel, infrared_channel
    ]
    raster_bands = [
        load_channel_raster(r)
        for r in channels_list
    ]
    
    # Normalizar los canales
    norm_data = lambda x: ((x - np.mean(x))/ np.std(x))
                           
    # Aplicar
    norm_raster_bands = [
        norm_data(raster_band)
        for raster_band in raster_bands
    ]
            
    # Construir el arreglo y retornar
    return np.array(norm_raster_bands)

In [8]:
def load_mask_img(mask_folder_path: str) -> np.array:
    # Obtener la referencia de la máscara
    mask_path = glob.glob(f"{mask_folder_path}/labels.*")[0]
    
    # Retornar la máscara
    return load_channel_raster(mask_path)

In [9]:
def ndvi(raster_array: np.array) -> np.array:
    # Se toma como referencia el orden de las bandas en load_source_img()
    # Las dimensiones aca son [bandas, altura, ancho]
    # Formula: NDVI (Sentinel 2) = (B8 – B4) / (B8 + B4)
    red_channel = raster_array[0, :, :]
    infrared_channel = raster_array[-1, :, :]
    
    # Evitar divisiones por cero e inestabilidades
    epsilon = 1e-8
    return ((infrared_channel - red_channel) / ((infrared_channel + red_channel) + epsilon))

In [10]:
def get_label_mask_path_folder(chip_id: str) -> str:
    # Retorna la ruta con base en el chip
    label_folder = f"{DATA_BASE}/ref_landcovernet_v1_labels/ref_landcovernet_v1_labels_{chip_id}"
    return label_folder

In [11]:
def label_mask_one_hot(raster_mask: np.array, classes: typ.List[int]) -> np.array:
    # Retorna una máscara One Hot por cada clase
    # Obtenerla en 2D solo con el primer canal
    raster_mask_2d = raster_mask 
    masks = [
        (raster_mask_2d == i).astype(np.uint16)
        for i in classes
    ]
    
    return np.array(masks)

## Cargar los datos de las imagenes 

In [12]:
# Cargar JSON con las imagenes
images_df = pd.read_json(f"{DATA_BASE}/images_to_use.json")

In [13]:
# Determinar muestra
sample_train = images_df.sample(frac=0.05, random_state=24)
sample_train_np = sample_train.to_numpy()

In [14]:
samples_number = sample_train_np.shape[0]
print(f"Muestras: {samples_number}")

Muestras: 1509


In [15]:
# Construir el arreglo
X = create_img_array (
    num_img = samples_number,
    heigth = HEIGHT,
    width = WIDTH,
    channels = CHANNELS
)

Y = create_img_array (
    num_img = samples_number,
    heigth = HEIGHT,
    width = WIDTH,  
    channels = 7
)

In [16]:
# Ejecutar la carga
for row_id in tqdm(range(len(sample_train_np))):
    row = sample_train_np[row_id]
    chip_id = row[1]
    source_path = row[2]
    
    # Cargar la imagen fuente
    source_raster = load_source_img(source_path)
    # Transformar con NDVI
    # Descartar para la version V2 de U-Net
    #source_raster = ndvi(source_raster)
    
    # Cargar la máscara
    mask_folder_path = get_label_mask_path_folder(chip_id=chip_id)
    mask_raster = load_mask_img(mask_folder_path)
    mask_raster_classes = label_mask_one_hot(mask_raster, list(range(1,8)))
    
    # Almacenar
    X[row_id] = rio_plot.reshape_as_image(source_raster)
    Y[row_id] = rio_plot.reshape_as_image(mask_raster_classes)
    

  0%|          | 0/1509 [00:00<?, ?it/s]



## Conjuntos de entrenamiento, validación, test

In [17]:
# Distribuir el conjunto de entrenamiento en dos, train y test
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

# Distribuir el conjunto de entrenamiento en dos, train y valid (validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

## Instanciar el modelo

### Versión del Labo

In [15]:
input_img = Input((HEIGHT, WIDTH, 1), name='img_shape')
model = get_unet(input_img, n_filters=16, dropout=0.05, batchnorm=True)
model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=["accuracy"])

2021-12-05 01:36:26.998853: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [17]:
callbacks = [
    EarlyStopping(patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1),
    ModelCheckpoint('model-ndvi.h5', verbose=1, save_best_only=True, save_weights_only=True)
]

In [20]:
results = model.fit(X_train, y_train, batch_size=32, epochs=5, callbacks=callbacks,\
                    validation_data=(X_valid, y_valid))

Epoch 1/5



User settings:

   KMP_AFFINITY=granularity=fine,verbose,compact,1,0
   KMP_BLOCKTIME=0
   KMP_DUPLICATE_LIB_OK=True
   KMP_INIT_AT_FORK=FALSE
   KMP_SETTINGS=1
   OMP_NUM_THREADS=8

Effective settings:

   KMP_ABORT_DELAY=0
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'
   KMP_ALIGN_ALLOC=64
   KMP_ALL_THREADPRIVATE=128
   KMP_ATOMIC_MODE=2
   KMP_BLOCKTIME=0
   KMP_CPUINFO_FILE: value is not defined
   KMP_DETERMINISTIC_REDUCTION=false
   KMP_DEVICE_THREAD_LIMIT=2147483647
   KMP_DISP_NUM_BUFFERS=7
   KMP_DUPLICATE_LIB_OK=true
   KMP_ENABLE_TASK_THROTTLING=true
   KMP_FORCE_REDUCTION: value is not defined
   KMP_FOREIGN_THREADS_THREADPRIVATE=true
   KMP_FORKJOIN_BARRIER='2,2'
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
   KMP_GTID_MODE=3
   KMP_HANDLE_SIGNALS=false
   KMP_HOT_TEAMS_MAX_LEVEL=1
   KMP_HOT_TEAMS_MODE=0
   KMP_INIT_AT_FORK=true
   KMP_LIBRARY=throughput
   KMP_LOCK_KIND=queuing
   KMP_MALLOC_POOL_INCR=1M
   KMP_NUM_LOCKS_IN_BLOCK=1
   KMP_PLAIN_BARRIER='2,2'
   KMP_PLAIN_BARRIE

Epoch 00001: val_loss improved from inf to -1.30918, saving model to model-ndvi.h5
Epoch 2/5
Epoch 00002: val_loss improved from -1.30918 to -2.08403, saving model to model-ndvi.h5
Epoch 3/5
Epoch 00003: val_loss improved from -2.08403 to -3.95525, saving model to model-ndvi.h5
Epoch 4/5
Epoch 00004: val_loss improved from -3.95525 to -6.38723, saving model to model-ndvi.h5
Epoch 5/5
Epoch 00005: val_loss improved from -6.38723 to -9.03810, saving model to model-ndvi.h5


### Versión V2

In [18]:
model_v2_shape = (HEIGHT, WIDTH, 4)
model_v2_classes = 7
model = build_unet(model_v2_shape, model_v2_classes)

model.compile(loss="categorical_crossentropy", metrics=["accuracy"], optimizer=tf.keras.optimizers.Adam(1e-4))

2021-12-05 02:47:00.243264: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [19]:
callbacks = [
        ModelCheckpoint("model.h5", verbose=1, save_best_model=True),
        ReduceLROnPlateau(monitor="val_loss", patience=3, factor=0.1, verbose=1, min_lr=1e-6),
        EarlyStopping(monitor="val_loss", patience=5, verbose=1)
]

In [20]:
results = model.fit(X_train, y_train, batch_size=32, epochs=5, callbacks=callbacks,\
                    validation_data=(X_valid, y_valid))

Epoch 1/5



User settings:

   KMP_AFFINITY=granularity=fine,verbose,compact,1,0
   KMP_BLOCKTIME=0
   KMP_DUPLICATE_LIB_OK=True
   KMP_INIT_AT_FORK=FALSE
   KMP_SETTINGS=1
   OMP_NUM_THREADS=8

Effective settings:

   KMP_ABORT_DELAY=0
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'
   KMP_ALIGN_ALLOC=64
   KMP_ALL_THREADPRIVATE=128
   KMP_ATOMIC_MODE=2
   KMP_BLOCKTIME=0
   KMP_CPUINFO_FILE: value is not defined
   KMP_DETERMINISTIC_REDUCTION=false
   KMP_DEVICE_THREAD_LIMIT=2147483647
   KMP_DISP_NUM_BUFFERS=7
   KMP_DUPLICATE_LIB_OK=true
   KMP_ENABLE_TASK_THROTTLING=true
   KMP_FORCE_REDUCTION: value is not defined
   KMP_FOREIGN_THREADS_THREADPRIVATE=true
   KMP_FORKJOIN_BARRIER='2,2'
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
   KMP_GTID_MODE=3
   KMP_HANDLE_SIGNALS=false
   KMP_HOT_TEAMS_MAX_LEVEL=1
   KMP_HOT_TEAMS_MODE=0
   KMP_INIT_AT_FORK=true
   KMP_LIBRARY=throughput
   KMP_LOCK_KIND=queuing
   KMP_MALLOC_POOL_INCR=1M
   KMP_NUM_LOCKS_IN_BLOCK=1
   KMP_PLAIN_BARRIER='2,2'
   KMP_PLAIN_BARRIE

Epoch 00001: saving model to model.h5
Epoch 2/5
Epoch 00002: saving model to model.h5
Epoch 3/5
Epoch 00003: saving model to model.h5
Epoch 4/5
Epoch 00004: saving model to model.h5
Epoch 5/5
Epoch 00005: saving model to model.h5


# Next section