# RPN

Afegim a la U-Net una branca nova, la branca de *region proposal network (RPN)*.  Introduida per primer cop per la *faster rcnn* duu a terme dues tasques alhora, per una part refina tot un conjunt de <a hfre="https://www.termcat.cat/ca/cercaterm/bounding%20box?type=basic">envolupants </a> i per l'altra indica quina és la probabilitat que cada un d'ells contengui un objecte.

<img style="width:75%" src="https://tryolabs.com/blog/images/blog/post-images/2018-01-18-faster-rcnn/rpn-conv-layers.63c5bf86.png" />

### Importam llibreries

In [None]:
import os
import json

import cv2 
import skimage
import skimage.io
import skimage.color
import skimage.transform
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from numpy.random import seed
from matplotlib import pyplot as plt
from tqdm.auto import tqdm

from tensorflow.keras import backend as K
import tensorflow.keras.layers as keras_layer

# Llibraries pròpies
from u_cells.u_cells.data import unet as u_data
from u_cells.u_cells.data import rpn as rpn_data
from u_cells.u_cells.model import unet as u_model
from u_cells.u_cells.model import rpn as rpn_model
from u_cells.u_cells.model import resnet as resnet_model
from u_cells.u_cells.common import config as rpn_config
from u_cells.u_cells.common import data as common_data
from u_cells.u_cells.common import metrics as rpn_metrics

seed(1)

### Configuració

Primerament cream un classe configuració per l'execusió i entrenament de la xarxa. En aquesta classe deixam els valors per defecte exceptuant els casos del nombre de classes, la mida de les ancores, les pases per època i el llindar mínim de confiança.

In [None]:
MULTI_CLASS = False
PYRAMID = False
TRANSFER = False

In [None]:
class CellConfig(rpn_config.Config):
    """Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "cells"
    BATCH_SIZE = 3

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    BACKBONE_STRIDES = [4]
    RPN_ANCHOR_SCALES = [32]

    # Number of classes (including background)
    if MULTI_CLASS:
        NUM_CLASSES = 1 + 3  # Background + 3 classes
    else:
        NUM_CLASSES = 1 + 1  # Background + 3 classes
    

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 100
#     LEARNING_RATE = 3e-01

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0
    PRED_THRESHOLD = 0.99999995
    
    IMAGE_SHAPE = [512, 512, 3]
    
    IMAGE_MAX_DIM = 512
    IMAGE_MIN_DIM = 400
    
    COMBINE_FG = True


config = CellConfig()
config.IMAGE_SHAPE = np.array([512,512,3])

print(config)

## Entrenament

Per realitzar l'entrenament primerament cream dos generadors d'imatges. Els generadors en el cas de la *RPN* es creen en dos temps. Primerament cream objectes **Dataset**.

### Dataset

Definim un objecte Dataset. Anàlogament a la configuració, ja definida, és basa en herència de classes abstractes definides a les llibreries. Un detall important és que en el cas de la RPN les dades es formen a partir dels envolupants, enlloc de l'inrevés.

In [None]:
class CellDataset(rpn_data.Dataset):

    def load_cell(self, dataset_dir, subset):
        """Load a subset of the Erithocites2 dataset.
        dataset_dir: Root directory of the dataset.
        subset: Subset to load: train or val
        """
        # Add classes. We have only one class to add.
        if MULTI_CLASS:
            self.add_class("cell", 1, "ELONGATED")
            self.add_class("cell", 2, "CIRCULAR")
            self.add_class("cell", 3, "OTHER")
        else:
            self.add_class("cell", 1, "cell")

        
        # Train or validation dataset?
        assert subset in ["train", "val"]
        dataset_dir = os.path.join(dataset_dir, subset)

        # Anottation following the format oof VIA
        annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. These are stores in the
            # shape_attributes (see json format above)
            # The if condition is needed to support VIA versions 1.x and 2.x.
            if type(a['regions']) is dict:
                a['regions'] = a['regions'].values()
                
            aux = [(r['shape_attributes'], r['type'] + 1) for r in a['regions']]
            
            polygons, cells = list(zip(*aux))
            
            
            if not MULTI_CLASS:
                cells = np.ones([len(polygons)], dtype=np.int32)
            else:
                cells = np.asarray(cells)

            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "cell",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons, cells = cells)

    def load_mask(self, image_id):
        """Generate instance masks for an image.
        
        Args:
            image_id:
        
        Returns:
            masks:  A bool array of shape [height, width, instance count] with
                    one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        image_info = self.image_info[image_id]

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        gt_class = []
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), info["cells"]
        

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        return info["path"]

Una vegada definit el *dataset* cream dues instàncies, una per l'entrenament i l'altra per validació.

In [None]:
# Training dataset.
dataset_train = CellDataset()
dataset_train.load_cell("./in/bboxes_class/", "train")
dataset_train.prepare()

# Validation dataset
dataset_val = CellDataset()
dataset_val.load_cell("./in/bboxes_class/", "val")
dataset_val.prepare()

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

augmentation = [  # apply the following augmenters to most images
        iaa.Fliplr(0.5),  # horizontally flip 50% of all images
        iaa.Flipud(0.2),  # vertically flip 20% of all images
        # crop images by -5% to 10% of their height/width
        # sometimes(iaa.CropAndPad(
        #     percent=(-0.05, 0.1),
        #     pad_mode=ia.ALL,
        #     pad_cval=(0, 255)
        # )),
        sometimes(iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            # scale images to 80-120% of their size, individually per axis
            translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
            # translate by -20 to +20 percent (per axis)
            rotate=(-45, 45),  # rotate by -45 to +45 degrees
            shear=(-16, 16),  # shear by -16 to +16 degrees
            order=[0, 1],  # use nearest neighbour or bilinear interpolation (fast)
            cval=0,  # if mode is constant, use a cval between 0 and 255
            mode=ia.ALL
            # use any of scikit-image's warping modes (see 2nd image from the top for examples)
        )),
        # execute 0 to 5 of the following (less important) augmenters per image
        # don't execute all of them, as that would often be way too strong
        iaa.SomeOf((0, 5),
                   [
                       iaa.OneOf([
                           iaa.GaussianBlur((0, 3.0)),  # blur images with a sigma between 0 and 3.0
                           iaa.AverageBlur(k=(2, 7)),
                           # blur image using local means with kernel sizes between 2 and 7
                           iaa.MedianBlur(k=(3, 11)),
                           # blur image using local medians with kernel sizes between 2 and 7
                       ]),
                   ],
                   random_order=True)]

augmentation = iaa.Sequential(augmentation)

In [None]:
train_generator = rpn_data.DataGenerator(100, dataset_train, config, shuffle=False, augmentation=augmentation)
val_generator = rpn_data.DataGenerator(100, dataset_val, config, shuffle=False)

### *Backbone* dataset

També cream un generador per la ``U-Net`` sense RPN per si volem fer un entrenament en dues fases.

In [None]:
train_generator_u = u_data.DataGenerator(3, 540 // 3, './in/CellPose/train/img/*.png', (512, 512), 3,
                                         u_data.DataFormat.MASK, mask_path='./in/CellPose/train/masks/*.png', 
                                         augmentation=None, background=False, rgb=True)

## Construim el model

### Backbone model

Primerament construim la U-Net que emprarem com a *backbone* per el model RPN.

In [None]:
U_NET = True

if U_NET:
    backbone = u_model.UNet(input_size=(512, 512, 3), out_channel=1, batch_normalization=True, residual=True)

    input_image, encoder, mask_out = backbone.build(n_filters=16, layer_depth=5, dilation_rate=1, last_activation="sigmoid")
    backbone.compile(loss_func = "binary_crossentropy", run_eagerly=True)
else:
    input_image = keras_layer.Input(self.__input_size, name="input_image")
    embedded_layer = resnet_model.resnet_graphs(input_image, 'resnet50')
    

In [None]:
backbone.summary()

In [None]:
if TRANSFER:
    backbone.train(train_generator_u, None, 5, 540 // 3, check_point_path=None, validation_steps=2)

#### Múltiples entrades


In [None]:
if TRANSFER:
    for l in backbone.model.layers:
        l.trainable = False

    for l in list(encoder.values())[::-1][:4]: # Four inner layers
        l.trainable = True

In [None]:
import tensorflow as tf

if PYRAMID:
    features = rpn_model.RPN.features_2_rpn(list(encoder.values())[::-1][:2], 256)
else:
    features = list(encoder.values())[-2]
    features = keras_layer.Concatenate(axis=-1)([features, keras_layer.UpSampling2D(size=(2, 2))(list(encoder.values())[-1])])
    features = keras_layer.Conv2D(256, (1,1))(features)
    features = keras_layer.Concatenate(axis=-1)([(list(encoder.values())[-3]), keras_layer.UpSampling2D(size=(2,2))(features)])
    features = keras_layer.Conv2D(256, (1,1))(features)
#     features = keras_layer.Concatenate(axis=-1)([(list(encoder.values())[-4]), keras_layer.UpSampling2D(size=(2,2))(features)])
#     features = keras_layer.Conv2D(256, (1,1))(features)

features

### RPN model

In [None]:
rpn = rpn_model.RPN(rpn_model.NeuralMode.TRAIN, (512, 512, 3), features, 256, mask_out, 
                    input_image, config)

rpn.build()
rpn.compile()
rpn.summary()

### Entrenam el model

In [None]:
rpn.train(train_generator=train_generator, val_generator=val_generator, epochs=10, check_point_path="./pesos2.hdf5")

## Inferència

Per realitzar la inferència generam un nou model, amb el mode ``INFERENCE``. Una vegada creat hem de carregar els pesos des d'un fitxer, generat quan acabam l'entrenament.

In [None]:
rpn = rpn_model.RPN(rpn_model.NeuralMode.INFERENCE, (512, 512, 3), features, 256, mask_out, 
                    input_image, config)

rpn.build()

In [None]:
rpn.load_weights("./pesos2.hdf5")

In [None]:
for t in val_generator:
    break
masks, cls, bboxes = rpn.predict(t[0][0])

In [None]:
assert (cls.shape[1] == bboxes.shape[1]) and (cls.shape[1] == val_generator.anchors.shape[0]), "Ancores i predicció diferents"

#### Aplicam les *deltas* als anchors

In [None]:
WINDOW = [64, 0, 448, 512]
ORG_IMG = [2352, 3136, 3]
IMG_SHAPE = [512, 512, 3]

In [None]:
bboxes_deltas = bboxes[0] * config.RPN_BBOX_STD_DEV

In [None]:
def windows_to_img(window_position, img_shape, bboxes):
    bboxes = np.copy(bboxes)
    window_shape = window_position[3] - window_position[1], window_position[2] - window_position[0]

    for i in range(bboxes.shape[1]):
        # Coordinades_img = (Coordinades_window - origen) * (Widht_img / window_width)
        bboxes[:, i] = (bboxes[:, i] - window_position[i % 2]) * (img_shape[(i + 1) % 2] / window_shape[ i % 2])
    
    return bboxes

In [None]:
bboxes_deltas = val_generator.decode_deltas(bboxes_deltas)

In [None]:
bboxes_deltas = windows_to_img(WINDOW, ORG_IMG, bboxes_deltas)

#### Filtram els envolupants amb una *objecteveness* menor que 0.7

In [None]:
in_img = cv2.imread("./in/bboxes_class/val/4.png")
in_img = in_img.astype(np.uint8)

plt.figure()
plt.imshow(in_img)

In [None]:
bboxes_filtered = bboxes_deltas[cls[0][:, 1] > 0.7]
bboxes_filtered = bboxes_filtered.astype(int)
bboxes_filtered.shape

In [None]:
out_img = np.copy(in_img.astype(np.uint8))

for r in common_data.non_max_suppression_fast(bboxes_filtered, 0.3, cls[0][:, 1][cls[0][:, 1] > 0.7]):  
    out_img = cv2.rectangle(out_img, (r[1], r[0]), (r[3], r[2]), (0, 255, 0), 3)

In [None]:
%matplotlib notebook

plt.figure()
plt.imshow(out_img);

# Mètriques

In [None]:
gt_g = []
p_g = []

dataset = dataset_val
generador = val_generator
for idx in tqdm(dataset.image_ids):
    img, _, _ , gt_bbox, _ = rpn_data.DataGenerator.load_image_gt(dataset, config, idx)
    img = generador.mold_image(img)
    _, cls, bboxes = rpn.predict(img.reshape(1, 512, 512, 3))
    
    bboxes_deltas = bboxes[0] * config.RPN_BBOX_STD_DEV
    bboxes_deltas = generador.decode_deltas(bboxes_deltas)
    
    bboxes_filtered = bboxes_deltas[cls[0][:, 1] > 0.7]
    cls =  cls[0][:, 1][cls[0][:, 1] > 0.7]

    inside_the_box = ((bboxes_filtered[:, 0] > WINDOW[0] + 5) & 
                       (bboxes_filtered[:, 1] > WINDOW[1] + 5) & 
                       (bboxes_filtered[:, 2] < WINDOW[2] - 5) & 
                       (bboxes_filtered[:, 3] < WINDOW[3] - 5))
    
    bboxes_filtered = bboxes_filtered[inside_the_box]
    cls = cls[inside_the_box]
    
    bboxes_filtered = common_data.non_max_suppression_fast(bboxes_filtered, 0.3, cls)
    
    _, _, pred = rpn_metrics.relate_bbox_to_gt(bboxes_filtered, gt_bbox)

    gt_p = [1] * len(pred)

    if len(pred) < len(bboxes_filtered):
        diff = len(bboxes_filtered) - len(pred)
        pred = pred + [1] * diff
        gt_p = gt_p + [0] * diff
        
    gt_g = gt_g + gt_p
    p_g = p_g + pred
    
rpn_metrics.basic_metrics(gt_g, p_g)

In [None]:
bboxes_filtered.shape

In [None]:
cv2.imwrite("./2.png", out_img)

In [None]:
val_generator.anchors.shape

In [None]:
weights = np.zeros_like(in_img)
aux = zip(cls[0][:, 1], val_generator.anchors)
aux = sorted(aux, key=lambda x: x[0])


for objecteveness, bounding_box in aux:
#     print((bounding_box[3] - bounding_box[1]) * (bounding_box[2] - bounding_box[0]))
    bounding_box = windows_to_img(WINDOW, ORG_IMG, np.array([bounding_box]))
    bounding_box = bounding_box[0].astype(int)
    
    cv2.rectangle(weights, (bounding_box[1], bounding_box[0]), (bounding_box[3], bounding_box[2]), (255 * objecteveness, 0, 0), -1)

dst = cv2.addWeighted(in_img, 0.5, weights, 0.5, 0.0)

In [None]:
plt.figure()
plt.imshow(dst);