# RPN

Afegim a la U-Net una branca nova, la branca de *region proposal network (RPN)*.  Introduida per primer cop per la *faster rcnn* duu a terme dues tasques alhora, per una part refina tot un conjunt de <a hfre="https://www.termcat.cat/ca/cercaterm/bounding%20box?type=basic">envolupants </a> i per l'altra indica quina és la probabilitat que cada un d'ells contengui un objecte.

<img style="width:75%" src="https://www.researchgate.net/publication/333048961/figure/fig1/AS:758094162296847@1557755141554/The-framework-of-Faster-R-CNN-RPN-region-proposal-network-RoI-region-of-interest-FC.ppm" />

### Importam llibreries

In [None]:
import os
import json
import colorsys
import random
from datetime import datetime

import cv2 
import skimage
import skimage.io
import skimage.color
import skimage.transform
import numpy as np
import pandas as pd
import imgaug as ia
import imgaug.augmenters as iaa
from numpy.random import seed
from matplotlib import pyplot as plt
from tqdm.auto import tqdm
from matplotlib import patches,  lines

from tensorflow.keras import backend as K
import tensorflow.keras.layers as keras_layer
import tensorflow as tf
from tensorflow.keras import utils as KU

# Llibraries pròpies
from u_cells.u_cells.data import unet as u_data
from u_cells.u_cells.data import rpn as rpn_data
from u_cells.u_cells.data import datasets as rpn_datasets
from u_cells.u_cells.model import unet as u_model
from u_cells.u_cells.model import rpn as rpn_model
from u_cells.u_cells.model import resnet as resnet_model
from u_cells.u_cells.common import config as rpn_config
from u_cells.u_cells.common import data as common_data
from u_cells.u_cells import layers as own_layers
from u_cells.u_cells.common import metrics as rpn_metrics
from u_cells.u_cells.common import losses as rpn_losses

seed(1)

In [None]:
# ============================================
# Optimisation Flags - Do not remove
# ============================================

os.environ['CUDA_CACHE_DISABLE'] = '0'

os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_GPU_THREAD_COUNT'] = '1'

os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'

os.environ['TF_ADJUST_HUE_FUSED'] = '1'
os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

os.environ['TF_SYNC_ON_FINISH'] = '0'
os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
os.environ['TF_DISABLE_NVTX_RANGES'] = '1'

# =================================================

In [None]:
def random_colors(N, bright=True):
    """
    Generate random colors.
    To get visually distinct colors, generate them in HSV space then
    convert to RGB.
    """
    brightness = 1.0 if bright else 0.7
    hsv = [(i / N, 1, brightness) for i in range(N)]
    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
    random.shuffle(colors)
    return colors

def draw_bboxes(img, bboxes, thickness=3):
    img = np.copy(img.astype(np.uint8))
    colors = random_colors(len(bboxes))

    for bbox, color in zip(bboxes, colors):
        color = np.array(color) * 255
        img = cv2.rectangle(img, (bbox[1], bbox[0]), (bbox[3], bbox[2]), color, thickness)
    
    return img

def make_masks(mask, slice_mask):
    if isinstance(slice_mask, list):
        if len(slice_mask) != 2:
            raise ValueError
        
        mask = np.sum(mask[:,:, slice_mask[0]:slice_mask[1]], axis=-1)
    elif isinstance(slice_mask, list):
        mask = mask[:, :, slice_mask]
    else:
        raise ValueError
    
    return mask

### Configuració

Primerament cream un classe configuració per l'execusió i entrenament de la xarxa. En aquesta classe deixam els valors per defecte exceptuant els casos del nombre de classes, la mida de les ancores, les pases per època i el llindar mínim de confiança.

In [None]:
MULTI_CLASS = False
PYRAMID = False
TRANSFER = False

In [None]:
class CellConfig(rpn_config.Config):
    """Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "cells"
    BATCH_SIZE = 6

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    BACKBONE_STRIDES = [4]
    RPN_ANCHOR_SCALES = [32]

    # Number of classes (including background)
    if MULTI_CLASS:
        NUM_CLASSES = 1 + 3  # Background + 3 classes
    else:
        NUM_CLASSES = 1 + 1  # Background + 3 classes
    

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 50
#     LEARNING_RATE = 3e-01

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0
    PRED_THRESHOLD = 0.99999995
    
    IMAGE_SHAPE = [512, 512, 3]
    
    IMAGE_MAX_DIM = 512
    IMAGE_MIN_DIM = 400
    
    COMBINE_FG = False
    RANDOM_MASKS = False
    MAKE_BACKGROUND_MASK = False
#     RPN_TRAIN_ANCHORS_PER_IMAGE = 200
    VALIDATION_STEPS = 10
    MAX_GT_INSTANCES = 100
    
    DO_MASK = True
    DO_MASK_CLASS = True
    DO_MERGE_BRANCH = True


config = CellConfig()
config.IMAGE_SHAPE = np.array([512,512,3])

print(config)

## Entrenament

Per realitzar l'entrenament primerament cream dos generadors d'imatges. Els generadors en el cas de la *RPN* es creen en dos temps. Primerament cream objectes **Dataset**.

### Dataset

Definim un objecte Dataset. Anàlogament a la configuració, ja definida, és basa en herència de classes abstractes definides a les llibreries. Un detall important és que en el cas de la RPN les dades es formen a partir dels envolupants, enlloc de l'inrevés.

In [None]:
dataset_train = rpn_datasets.ErithocytesDataset([("cell", 1, "cell")], "bboxes.json")
dataset_train.load_cell("./in/eritocitos_augmented/", rpn_datasets.Subset.TRAIN)
dataset_train.prepare()

In [None]:
# Validation dataset
dataset_val = rpn_datasets.ErithocytesDataset([("cell", 1, "cell")], "bboxes.json")
dataset_val.load_cell("./in/eritocitos_augmented/", rpn_datasets.Subset.VALIDATION)
dataset_val.prepare()

Una vegada definit el *dataset* cream dues instàncies, una per l'entrenament i l'altra per validació.

In [None]:
train_generator = rpn_data.DataGenerator(50, dataset_train, config, shuffle=False, phantom_output=True)
val_generator = rpn_data.DataGenerator(2, dataset_val, config, shuffle=False)

## Construim el model

### Backbone model

Primerament construim la U-Net que emprarem com a *backbone* per el model RPN.

In [None]:
def build_model(mode : rpn_model.NeuralMode):
    encoder = u_model.EncoderUNet(input_size=(512, 512, 3), residual = True)
    input_image, embedded = encoder.build(n_filters=16, last_activation='softmax', dilation_rate=1, layer_depth=5)

    features = list(encoder.layers.values())[-2]
    features = keras_layer.Concatenate(axis=-1, name="conc_1")([features, keras_layer.Conv2DTranspose(256, kernel_size=(3, 3), strides=(2, 2), name="convd_tranposed_1", padding="same")(list(encoder.layers.values())[-1])])
    features = keras_layer.Conv2D(256, (1,1), name="conv_1")(features)
    features = keras_layer.Concatenate(axis=-1, name="conc_2")([(list(encoder.layers.values())[-3]), keras_layer.Conv2DTranspose(256, name="convd_tranposed_2", kernel_size=(3, 3), strides=(2, 2), padding="same")(features)])
    features = keras_layer.Conv2D(256, (1,1), name="conv_2")(features)

    rpn = rpn_model.RPN(mode, (512, 512, 3), features, 256, None, input_image, config)
    rpn_out, rpn_conv  = rpn.build_rpn(features)

    _, rpn_class, rpn_bbox  = rpn_out

    grad_cam = {}
    coord_conv = {}
    size = 512
    for key_layer, value in list(encoder.layers.items()):
#         grad_cam[key_layer] = rpn_layers.GradCAM(name=f"grad_cam_{key_layer}")(input_image, encoder.layers[key_layer], rpn_class)
        grads = keras_layer.Lambda(lambda x: tf.gradients(x[1], x[0], unconnected_gradients='zero'), name=f"grad_{key_layer}")([encoder.layers[key_layer], rpn_class])

        # This is a vector where each entry is the mean intensity of the gradient over a specific
        # feature map channel
        pooled_grads = keras_layer.Lambda(lambda x: tf.reduce_mean(x, axis=(1, 2)), name=f"pooled_grads_{key_layer}")(grads[0])

        # We multiply each channel in the feature map array by "how important this channel is" with
        # regard to the top predicted class then sum all the channels to obtain the heatmap class
        # activation
        last_conv_layer_output = encoder.layers[key_layer]

        pooled_grads = keras_layer.Lambda(
            lambda x: tf.expand_dims(tf.expand_dims(x, axis=1), axis=1))(pooled_grads)
        heatmap = keras_layer.Lambda(lambda x: x[0] * x[1])([last_conv_layer_output, pooled_grads])
        heatmap = keras_layer.Lambda(lambda x: tf.reduce_sum(x, axis=-1))(heatmap)
        heatmap = keras_layer.Lambda(lambda x: tf.expand_dims(x, axis=-1))(heatmap)
        
        heatmap = keras_layer.Lambda(lambda x: tf.maximum(x, 0) / tf.math.reduce_max(x))(heatmap)
        
        coord_conv[key_layer] = [size, size]
        size /= 2
        grad_cam[key_layer] = heatmap
        
    decoder = u_model.DecoderUNet(input_size=None, residual=True, n_channels=100, class_output_size=512, 
                                  merge_branch=True)
#     mask_out = decoder.build(n_filters=16, last_activation='sigmoid', extra_layer = grad_cam, encoder=encoder,
    mask_out, class_out, merge_branch = decoder.build(n_filters=16, last_activation='sigmoid', encoder=encoder, 
                             dilation_rate=1, embedded=embedded)
    
    rpn.build(mask_shape = [512, 512, None], rpn=rpn_out, mask_output=mask_out, do_mask=True, mask_class=class_out, 
              merge_branch=merge_branch)
    
    return rpn

In [None]:
rpn = build_model(rpn_model.NeuralMode.TRAIN)
rpn.compile(do_mask=True, do_class_mask=True, do_merge_branch=True);

In [None]:
datetime_str = ('{date:%Y-%m-%d-%H:%M:%S}'.format(date=datetime.now()))
print(datetime_str)
rpn.train(train_generator=train_generator, val_generator=val_generator, epochs=100, check_point_path="./pesos.hdf5",
          validation_steps=2, 
          callbacks = [tf.keras.callbacks.TensorBoard(log_dir=f"./out/logs/{datetime_str}", histogram_freq = 1,
                                                profile_batch = '500,520')])
#           use_multiprocessing=True, workers=12) # To increase performance ?

In [None]:
config.RPN_NUM_OUTPUTS

### RPN model

### Entrenam el model

## Inferència

Per realitzar la inferència generam un nou model, amb el mode ``INFERENCE``. Una vegada creat hem de carregar els pesos des d'un fitxer, generat quan acabam l'entrenament.

In [None]:
rpn = build_model(rpn_model.NeuralMode.INFERENCE)
rpn.load_weights("./pesos.hdf5")
rpn.summary()

In [None]:
for t in train_generator:
    break
masks, cls, bboxes, msk_cls, mask_merge = rpn.predict(t[0][0])

In [None]:
t[0][1].shape

In [None]:
mask_merge.shape

In [None]:
msk_cls[0] > 0.5

In [None]:
%matplotlib inline

plt.figure(figsize=(25, 25))
for i in range(81):
#     print((masks[0, :, :, i].min(), masks[0, :, :, i].max()))
    plt.subplot(9, 9, i + 1)
    mask = masks[0,:,:,i]
    mask[mask < 0.5] = 0 
    plt.imshow(mask)

In [None]:
masks.shape

In [None]:
%matplotlib notebook

plt.figure()
plt.rcParams['figure.figsize'] = [10, 5]
# m = masks[2, :, :, 0]
# m[m < 0.5] = 0
plt.imshow(mask_merge[0, :, :]);

In [None]:
masks[0].min()

In [None]:
assert (cls.shape[1] == bboxes.shape[1]) and (cls.shape[1] == val_generator.anchors.shape[0]), "Ancores i predicció diferents"

#### Aplicam les *deltas* als anchors

In [None]:
WINDOW = [64, 0, 448, 512]
ORG_IMG = [2352, 3136, 3]
IMG_SHAPE = [512, 512, 3]

In [None]:
bboxes_deltas = bboxes[1] * config.RPN_BBOX_STD_DEV

In [None]:
def windows_to_img(window_position, img_shape, bboxes):
    bboxes = np.copy(bboxes)
    window_shape = window_position[3] - window_position[1], window_position[2] - window_position[0]

    for i in range(bboxes.shape[1]):
        # Coordinades_img = (Coordinades_window - origen) * (Widht_img / window_width)
        bboxes[:, i] = (bboxes[:, i] - window_position[i % 2]) * (img_shape[(i + 1) % 2] / window_shape[ i % 2])
    
    return bboxes

In [None]:
bboxes_deltas = val_generator.decode_deltas(bboxes_deltas)

In [None]:
bboxes_deltas = windows_to_img(WINDOW, ORG_IMG, bboxes_deltas)

#### Filtram els envolupants amb una *objecteveness* menor que 0.7

In [None]:
in_img = cv2.imread("./in/bboxes_class/val/5.png")
in_img = in_img.astype(np.uint8)

plt.figure()
plt.imshow(in_img);

In [None]:
in_img.shape

In [None]:
cls_filtered = cls[1][:, 1][cls[1][:, 1] > 0.7]
bboxes_filtered = bboxes_deltas[cls[1][:, 1] > 0.7]
bboxes_filtered = bboxes_filtered.astype(int)

cls_filtered = cls_filtered[(bboxes_filtered[:, 0] > 0) & (bboxes_filtered[:, 1] > 0) & (bboxes_filtered[:, 3] < 3136 ) & (bboxes_filtered[:, 2] < 2352)]
bboxes_filtered = (bboxes_filtered[(bboxes_filtered[:, 0] > 0) & (bboxes_filtered[:, 1] > 0) & (bboxes_filtered[:, 3] < 3136) & (bboxes_filtered[:, 2] < 2352)])

bboxes_filtered.shape

In [None]:
%matplotlib notebook
bboxes_def = common_data.non_max_suppression_fast(bboxes_filtered, 0.3, cls_filtered)
img = draw_bboxes(in_img, bboxes_def, 3)

plt.figure()
plt.imshow(img);

In [None]:
bboxes_def.shape

# Mètriques

In [38]:
THRESH = 0.7
gt_g = []
p_g = []

dataset = dataset_val
generador = val_generator
resultats = []
diff_gen = 0
for idx in tqdm(dataset.image_ids):
    org_img, _, _ , gt_bbox, mask_gt = rpn_data.DataGenerator.load_image_gt(dataset, config, idx)
    img = np.copy(org_img)
    
    img = generador.mold_image(img)
    mask_pred, cls, bboxes, msk_cls, mask_merge = rpn.predict(img.reshape(1, 512, 512, 3))
    
    bboxes_deltas = bboxes[0] * config.RPN_BBOX_STD_DEV
    bboxes_deltas = generador.decode_deltas(bboxes_deltas)
    
    bboxes_filtered = bboxes_deltas[cls[0][:, 1] > THRESH]
    cls =  cls[0][:, 1][cls[0][:, 1] > THRESH]

    inside_the_box = ((bboxes_filtered[:, 0] > WINDOW[0] + 5) & 
                       (bboxes_filtered[:, 1] > WINDOW[1] + 5) & 
                       (bboxes_filtered[:, 2] < WINDOW[2] - 5) & 
                       (bboxes_filtered[:, 3] < WINDOW[3] - 5))
    
    bboxes_filtered = bboxes_filtered[inside_the_box]
    cls = cls[inside_the_box]
    
    bboxes_filtered = common_data.non_max_suppression_fast(bboxes_filtered, 0.3, cls)
    res = draw_bboxes(org_img, bboxes_filtered, 1)
    
    img_path = os.path.join(".", "out", "res")
    os.makedirs(img_path, exist_ok=True)
    cv2.imwrite(os.path.join(img_path, f"{idx}.png"), res)
    
    _, _, pred = rpn_metrics.relate_bbox_to_gt(bboxes_filtered, gt_bbox)

    gt_p = [1] * len(pred)

    if len(pred) < len(bboxes_filtered):
        diff = len(bboxes_filtered) - len(pred)
        pred = pred + [1] * diff
        gt_p = gt_p + [0] * diff
        
    gt_g = gt_g + gt_p
    p_g = p_g + pred
    metrics = list(rpn_metrics.basic_metrics(gt_p, pred))
    
    msk_cls[msk_cls < 0.5] = 0
    diff = np.abs(mask_gt.shape[-1] - np.count_nonzero(msk_cls))
    diff_gen += diff
    
    metrics.append(diff)
#     rpn_losses.onw_dice_coefficient()
    
    resultats.append(metrics)

diff_gen /= len(dataset.image_ids)
resultats.append(list(rpn_metrics.basic_metrics(gt_g, p_g)) + [diff_gen])
df = pd.DataFrame(resultats)
df.columns = ['Precision BB', 'Recall BB', 'F1 BB', "Diff MSK_CLS"]

# df.to_csv(os.path.join(img_path, "resultats_experiment_.csv"))
df

  0%|          | 0/16 [00:00<?, ?it/s]

Unnamed: 0,Precision BB,Recall BB,F1 BB,Diff MSK_CLS
0,0.782609,0.9,0.837209,6.0
1,0.660377,0.945946,0.777778,6.0
2,0.627907,0.771429,0.692308,9.0
3,0.695652,0.969697,0.810127,6.0
4,0.627907,0.658537,0.642857,9.0
5,0.7,0.933333,0.8,8.0
6,0.733333,0.897959,0.807339,2.0
7,0.655738,0.769231,0.707965,15.0
8,0.765432,0.873239,0.815789,5.0
9,0.732394,0.825397,0.776119,5.0


In [None]:
os.path.join(img_path, "resultats_experiment_.csv")

# Brutor

In [None]:
rpn = rpn_model.RPN(rpn_model.NeuralMode.TRAIN, (512, 512, 3), features, 256, mask_out, 
                    input_image, config)

rpn.build()
rpn.compile()
rpn.summary()

#### Múltiples entrades


In [None]:
if TRANSFER:
    for l in backbone.model.layers:
        l.trainable = False

    for l in list(encoder.values())[::-1][:4]: # Four inner layers
        l.trainable = True

In [None]:
import tensorflow as tf

if PYRAMID:
    features = rpn_model.RPN.features_2_rpn(list(encoder.values())[::-1][:3], 256)
    features = features[::-1]
else:
    feat = []
    features = list(encoder.values())[-2]
    features = keras_layer.Concatenate(axis=-1)([features, keras_layer.UpSampling2D(size=(2, 2))(list(encoder.values())[-1])])
    features = keras_layer.Conv2D(256, (1,1))(features)
    features = keras_layer.Concatenate(axis=-1)([(list(encoder.values())[-3]), keras_layer.UpSampling2D(size=(2,2))(features)])
    features = keras_layer.Conv2D(256, (1,1))(features)

features

In [None]:
rpn = rpn_model.RPN(rpn_model.NeuralMode.INFERENCE, (512, 512, 3), features, 256, mask_out, 
                    input_image, config)

rpn.build()

In [None]:
rpn.load_weights("./pesos2.hdf5")