# Test custom Faster RCNN model

Test the custom Faster RCNN provided by the tutors

## Dependencies

In [2]:
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import random
import sys
import time
import numpy as np
import pickle
import math
import cv2
from matplotlib import pyplot as plt
import pandas as pd
import os

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout, TimeDistributed, Layer
from tensorflow.keras.models import Model

## Paths

In [29]:
BASE_PATH = 'F:/TFG-PabloHernandez-Detector/Modelos/TFG/ModeloTGCfull'
TEST_IMAGES = '../TestsTGC/testArucas'
RESULTS = 'F:/Datasets/FasterRCNNResults/'
CONFIG = os.path.join(BASE_PATH, 'model/model_vgg_config.pickle')

In [30]:
PREFIX = "TestsTGC/testArucas" # Used as auxilair constant ~ match the TEST_IMAGES path

## Classes and functions definition

In [31]:
#### Config setting
class Config:

    def __init__(self):
        # Tamaños de anchores
        self.anchor_box_scales = [32, 64, 128]

        # Ratios de anchores
        self.anchor_box_ratios = [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]]

        # Tamaño a redimensionar la dimension más pequeña de la imagen
        self.im_size = 600

        # numero de ROIs procesados simultáneamente
        self.num_rois = 4

        # stride para el modelo RPN (modelo base VGG16)
        self.rpn_stride = 16

        # scaling the stdev
        self.std_scaling = 4.0
        self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0]

        # threshold para el modelo RPN
        self.rpn_min_overlap = 0.3
        self.rpn_max_overlap = 0.7

        # threshold para el clasificador final
        self.classifier_min_overlap = 0.1
        self.classifier_max_overlap = 0.5

        # codificación de las clases
        self.class_mapping = None

        self.model_path = None

#### Definicion ROI Pooling Convolutional Layer
class RoiPoolingConv(Layer):
    def __init__(self, pool_size, num_rois, **kwargs):
        self.dim_ordering = K.image_data_format()
        self.pool_size = pool_size
        self.num_rois = num_rois

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]   

    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):
        assert(len(x) == 2)

        # x[0] is image with shape (rows, cols, channels)
        img = x[0]
        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        outputs = []
        for roi_idx in range(self.num_rois):
            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            x = K.cast(x, 'int32')
            y = K.cast(y, 'int32')
            w = K.cast(w, 'int32')
            h = K.cast(h, 'int32')

            # Resized roi of the image to pooling size (7x7)
            rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)

        # Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
        # Might be (1, 4, 7, 7, 3)
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output

    def get_config(self):
        config = {'pool_size': self.pool_size, 'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

#### Vgg-16 modelo base
def nn_base(input_tensor=None):

    if input_tensor is None:
        img_input = Input(shape=(None, None, 3))
    else:
        img_input = input_tensor

    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)

    return x

####  modelo RPN
def rpn_layer(base_layers, num_anchors):
    x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)

    x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)
    x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)

    return [x_class, x_regr, base_layers]

####  modelo clasificador final
def classifier_layer(base_layers, input_rois, num_rois, nb_classes):
    pooling_regions = 7

    # TimeDistributed layers se utiliza para procesar ROIs de forma independiente.
    # Se indica el número de ROIs de entrada añadiendo una dimensión mas (num_rois)
    # out_roi_pool es una lista de 4 RoI (7x7x512)
    # out_roi_pool.shape = (1, num_rois, pool_size, pool_size, channels)
    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])

    # Flatten out_roi_pool y conectar a 2 Fully-Connected y 2 dropout layers
    out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)
    out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)
    out = TimeDistributed(Dropout(0.5))(out)
    out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)
    out = TimeDistributed(Dropout(0.5))(out)

    # out_class: prediccion de la clase del objeto
    out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
    # out_regr: prediccion de las coordenadas de los bboxes
    out_regr = TimeDistributed(Dense(4*(nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)

    return [out_class, out_regr]

# Algoritmo NMS para evitar duplicidades en los bboxes delimitando un mismo objeto
def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300):
    # codigo extraido de: http://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
    # Explicacion del proceso:
    #   Paso 1: Ordenar la lista de probabilidades
    #   Paso 2: Seleccionar la probabilidad más alta y copiarla en una lista aparte
    #   Paso 3: Calcular el IoU entre el bbox de la probabilidad seleccionada con el resto de bboxes en la lista
    #           Si (IoU > overlap_threshold) eliminar el bbox y probabilidad de su lista correspondiente
    #   Paso 4: Repetir los pasos 2 y 3 hasta vaciar la lista de probabilidades

    # si no hay bboxes devuelve una lista vacia
    if len(boxes) == 0:
        return []

    # captura las coordenadas de todos los bboxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    np.testing.assert_array_less(x1, x2)
    np.testing.assert_array_less(y1, y2)

    # las coordenadas de los bboxes son convertidas a floats para las divisiones
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # lista de indices seleccionados
    pick = []

    # calculo de las areas de todos los bboxes
    area = (x2 - x1) * (y2 - y1)

    # ordena las probabilidades (scores) de los bboxes en orden ascendente
    # el score más alto está el último
    idxs = np.argsort(probs)

    while len(idxs) > 0:
        # añade el último index (el de mayor score) de la lista "idx" a la lista "pick"
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # busca las coordenadas más grandes (xmin,ymin) del top-left de cada bbox y
        # las más grandes (xmax,ymax) del bottom-right de cada bbox
        xx1_int = np.maximum(x1[i], x1[idxs[:last]])
        yy1_int = np.maximum(y1[i], y1[idxs[:last]])
        xx2_int = np.minimum(x2[i], x2[idxs[:last]])
        yy2_int = np.minimum(y2[i], y2[idxs[:last]])
        # calcular el ancho y alto de cada bbox
        ww_int = np.maximum(0, xx2_int - xx1_int)
        hh_int = np.maximum(0, yy2_int - yy1_int)

        # calcula la interseccion y la union
        area_int = ww_int * hh_int
        area_union = area[i] + area[idxs[:last]] - area_int
        # calcula el IoU
        overlap = area_int/(area_union + 1e-6)

        # elimina los indices de la lista "idx" con IoU > overlap_thresh, y el último index tambien
        idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlap_thresh)[0])))

        if len(pick) >= max_boxes:
            break

    # devuelve aquellos bboxes seleccionados, cuyos index están almacenados en la lista "pick"
    boxes = boxes[pick].astype("int")
    probs = probs[pick]
    return boxes, probs

# aplica la correccion de los deltas predichos por el modelo RPN
def apply_regr_rpn(X, T):
    # corrige las coordenadas (x,y,w,h) del anchor según los deltas (tx,ty,tw,th)
    # Segun se indica en el paper original:
    # tx=(cx_gt-cx_anchor)/w_anchor, ty=(cy_gt-cy_anchor)/h_anchor, tw=log(w_gt/w_anchor), tw=log(h_gt/h_anchor)
    # Nota: np.exp() permite trabajar con arrays, mientras que math.exp() sólo con escalares
    try:
        x = X[0, :, :]
        y = X[1, :, :]
        w = X[2, :, :]
        h = X[3, :, :]

        tx = T[0, :, :]
        ty = T[1, :, :]
        tw = T[2, :, :]
        th = T[3, :, :]

        cx = x + w/2.
        cy = y + h/2.
        cx1 = tx * w + cx
        cy1 = ty * h + cy

        w1 = np.exp(tw.astype(np.float64)) * w
        h1 = np.exp(th.astype(np.float64)) * h
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.

        x1 = np.round(x1)
        y1 = np.round(y1)
        w1 = np.round(w1)
        h1 = np.round(h1)
        return np.stack([x1, y1, w1, h1])
    except Exception as e:
        print(e)
        return X

# aplica la correccion de los deltas predichos por el modelo clasificador final
def apply_regr_classfinal(x, y, w, h, tx, ty, tw, th):
    # corrige las coordenadas (x,y,w,h) del anchor según los deltas (tx,ty,tw,th)
    # tx=(cx_gt-cx_anchor)/w_anchor, ty=(cy_gt-cy_anchor)/h_anchor, tw=log(w_gt/w_anchor), tw=log(h_gt/h_anchor)
    try:
        cx = x + w/2.
        cy = y + h/2.
        cx1 = tx * w + cx
        cy1 = ty * h + cy

        w1 = math.exp(tw) * w
        h1 = math.exp(th) * h
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.

        x1 = int(round(x1))
        y1 = int(round(y1))
        w1 = int(round(w1))
        h1 = int(round(h1))

        return x1, y1, w1, h1

    except ValueError:
        return x, y, w, h
    except OverflowError:
        return x, y, w, h
    except Exception as e:
        print(e)
        return x, y, w, h

# define los ROIs a partir de las predicciones de scores y deltas de cada anchor por el modelo RPN
def rpn_to_roi(out_rpn_cls, out_rpn_regr, C, max_boxes=300, overlap_thresh=0.9):
    # Pasos:
    #   1. Calcula los bboxes de los ROIs: obtiene coordenadas de los anchores de cada punto del feature map
    #   2. Cada anchor es corregido por los deltas predichos por el modelo RPN
    #   3. Recorta aquellos bboxes que sobresalgan de la imagen
    #   4. Aplica NMS sobre los bboxes
    # Devuelve las coordenadas de los bboxes seleccionados (no los scores)

    # Decodificación deltas (deltas = deltas*0.25) - p.e. x=(x_gt-x_anc)/(w_anc*var) y w=ln(w_gt/w_anc)/var
    out_rpn_regr = out_rpn_regr / C.std_scaling

    anchor_sizes = C.anchor_box_scales   # (son 3)
    anchor_ratios = C.anchor_box_ratios  # (son 3)

    assert out_rpn_cls.shape[0] == 1
    (rows, cols) = out_rpn_cls.shape[1:3]

    # A.shape = (4, feature_map.height, feature_map.width, num_anchors) = (4,18,25,9) si la imagen es 400x300
    # A almacena las coordenadas de los 9 anchores por cada punto del feature map => 18x25x9=4050 anchores
    A = np.zeros((4, out_rpn_cls.shape[1], out_rpn_cls.shape[2], out_rpn_cls.shape[3]))

    curr_anchor = 0 # indica un anchor en el rango 0~8 (9 anchores)
    for anchor_size in anchor_sizes:
        for anchor_ratio in anchor_ratios:
            # ancho y alto del anchor en el feature map = (ancho * escala) / 16
            anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride
            anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride

            # regr almacena los deltas del current_anchor en todas las posiciones del feature map
            regr = out_rpn_regr[0, :, :, 4 * curr_anchor:4 * curr_anchor + 4] # shape => (18, 25, 4)
            regr = np.transpose(regr, (2, 0, 1)) # shape => (4, 18, 25)

            # Grid del mismo tamaño que el feature map
            X, Y = np.meshgrid(np.arange(cols),np. arange(rows))

            # Calcula coordenadas (x,y,w,h) del current_anchor en todas las posiciones del feature map
            A[0, :, :, curr_anchor] = X - anchor_x/2
            A[1, :, :, curr_anchor] = Y - anchor_y/2
            A[2, :, :, curr_anchor] = anchor_x
            A[3, :, :, curr_anchor] = anchor_y

            # corrige coordenadas (x,y,w,h) del anchor con deltas (tx,ty,tw,th) predecidos por el modelo RPN
            A[:, :, :, curr_anchor] = apply_regr_rpn(A[:, :, :, curr_anchor], regr)

            # Evita bboxes con altura o anchura menor que 1 (redondea a 1)
            A[2, :, :, curr_anchor] = np.maximum(1, A[2, :, :, curr_anchor])
            A[3, :, :, curr_anchor] = np.maximum(1, A[3, :, :, curr_anchor])

            # Convierte (x, y , w, h) => (x1, y1, x2, y2)
            A[2, :, :, curr_anchor] += A[0, :, :, curr_anchor]
            A[3, :, :, curr_anchor] += A[1, :, :, curr_anchor]

            # Recorta aquellos bboxes que sobresalgan de la imagen (o del feature map)
            A[0, :, :, curr_anchor] = np.maximum(0, A[0, :, :, curr_anchor])
            A[1, :, :, curr_anchor] = np.maximum(0, A[1, :, :, curr_anchor])
            A[2, :, :, curr_anchor] = np.minimum(cols-1, A[2, :, :, curr_anchor])
            A[3, :, :, curr_anchor] = np.minimum(rows-1, A[3, :, :, curr_anchor])

            curr_anchor += 1

    # almacena la informacion en forma de listas
    all_boxes = np.reshape(A.transpose((0, 3, 1, 2)), (4, -1)).transpose((1, 0))  # shape => (4050, 4)
    all_probs = out_rpn_cls.transpose((0, 3, 1, 2)).reshape((-1))                 # shape => (4050,)

    x1 = all_boxes[:, 0]
    y1 = all_boxes[:, 1]
    x2 = all_boxes[:, 2]
    y2 = all_boxes[:, 3]

    # Elimina bboxes con coordenadas erróneas
    idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
    all_boxes = np.delete(all_boxes, idxs, 0)
    all_probs = np.delete(all_probs, idxs, 0)

    # Non_max_suppression. Solo capturamos los bboxes, no necesitamos los scores
    result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0]
    return result

# Redimensiona la imagen al tamaño especificado en la configuracion
def format_img_size(img, C):
#    img_min_side = float(C.im_size)
#    (height,width,_) = img.shape

#    if width <= height:
#        ratio = img_min_side/width
#        new_height = int(ratio * height)
#        new_width = int(img_min_side)
#    else:
#        ratio = img_min_side/height
#        new_width = int(ratio * width)
#        new_height = int(img_min_side)
#    img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
    ratio = 1
    return img, ratio

# BGR >> RGB
def format_img_channels(img):
    img = img[:, :, (2, 1, 0)]
    img = np.expand_dims(img, axis=0)
    return img

def format_img(img, C):
    img, ratio = format_img_size(img, C)
    img = format_img_channels(img)
    return img, ratio

# Transforma las coordenadas de los bboxes de la imagen redimensionada a la original
def get_real_coordinates(ratio, x1, y1, x2, y2):
    real_x1 = int(round(x1 // ratio))
    real_y1 = int(round(y1 // ratio))
    real_x2 = int(round(x2 // ratio))
    real_y2 = int(round(y2 // ratio))
    return (real_x1, real_y1, real_x2 ,real_y2)

## Run test

In [32]:
##########
# TEST
##########

%cd {BASE_PATH}

with open(CONFIG, 'rb') as f_in:
	C = pickle.load(f_in)
 
# capa Input del modelo VGG (Imagenes RGB)
img_input = Input(shape=(None, None, 3))
# capa Input del modelo RoI Pooling
roi_input = Input(shape=(C.num_rois, 4))
# capa Input del modelo clasificador (convolutional feature map (H/stride, W/stride, 512))
num_features = 512
feature_map_input = Input(shape=(None, None, num_features))

# define la red base (VGG16)
shared_layers = nn_base(img_input)

# define el modelo RPN
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = rpn_layer(shared_layers, num_anchors)

# define el modelo clasificador final
classifier = classifier_layer(feature_map_input, roi_input, C.num_rois, nb_classes=len(C.class_mapping))

# Creamos los modelos
model_rpn = Model(img_input, rpn_layers)
model_classifier = Model([feature_map_input, roi_input], classifier)

print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)

# se intercambian las parejas <'clase', valor>
class_mapping = C.class_mapping
class_mapping = {v: k for k, v in class_mapping.items()}
print(class_mapping)

imgs_path = os.listdir(TEST_IMAGES)
all_imgs = []
classes = {}

# threshold score (se ignoran las predicciones con valores de probabilidad menores)
bbox_threshold = 0.7

# las predicciones se alamcenan en un dataframe
column_names = ["name", "xmin", "ymin", "xmax", "ymax", "class", "score"]
predictions = pd.DataFrame(columns = column_names)

# lee annotationTest.txt en un Dataframe para marcar los ground-truth bboxes en las imagenes
df_bboxes_gt_test = pd.read_csv(TEST_IMAGES + '/annotateTest.txt', sep=",", header=None)
df_bboxes_gt_test.columns = ["filename", "xmin", "ymin", "xmax", "ymax", "class"]
# agrupa las anotaciones de una misma imagen (columna "filename")
gt_grouped_by_filename = df_bboxes_gt_test.groupby('filename')

F:\TFG-PabloHernandez-Detector\Modelos\TFG\ModeloTGCfull
Loading weights from ./model/model_frcnn_vgg.hdf5
{0: 'dorsal', 1: 'bg'}


In [38]:
number_images = 1
for idx, img_name in enumerate(imgs_path):
    if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
        continue
    print(img_name)
    st = time.time()

    filepath = os.path.join(TEST_IMAGES + '/' + img_name)
    img = cv2.imread(filepath)

    # re-escala la imagen y transforma BGR -> RGB
    X, ratio = format_img(img, C)

    # Y1: probabilidad de cada anchor (de incluir un objeto) correspondiente a cada punto del feature map
    # Y2: deltas del bbox de cada anchor correspondiente a cada punto del feature map
    # Los valores deltas son codificados con la varianza, p.e. x=(x_gt-x_anc)/(w_anc*var) y w=ln(w_gt/w_anc)/var
    # F: feature map
    [Y1, Y2, F] = model_rpn.predict(X)

    # Corrige los anchores con las predicciones delta del modelo RPN y selecciona bboxes mediante NMS
    # R.shape = (300, 4)
    R = rpn_to_roi(Y1, Y2, C, overlap_thresh=0.7)

    # (x1,y1,x2,y2) => (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # almacena info de los ROI seleccionados
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0]//C.num_rois + 1):
        # selecciona los siguientes 4 bboxes
        ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0]//C.num_rois:
            # pad R para incluir 4 ROIs que es la entrada esperada por el clasificador final
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        # F: feature maps
        # P_cls (4x2): score de cada ROI (4 ROI de entrada) y para cada clase (incluyendo la 'bg')
        # P_regr (4x4): deltas bbox (4 values) para cada clase y para cada ROI (4 ROI de entrada)
        [P_cls, P_regr] = model_classifier.predict([F, ROIs])

        # Calcula coordenadas bboxes en la imagen original
        for ii in range(P_cls.shape[1]):
            # Ignora ROI con (score<bbox_threshold) or (ROI con clase 'bg')
            cls_num = np.argmax(P_cls[0, ii, :])
            if np.max(P_cls[0, ii, :]) < bbox_threshold or cls_num == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[cls_num]  # nombre asignado a la clase
            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]
            try:
                # extrae deltas predecidos por el clasificador final para este ROI
                (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                # corregimos bbox del ROI
                x, y, w, h = apply_regr_classfinal(x, y, w, h, tx, ty, tw, th)
            except:
                pass

            # almacenamos coordenadas de bboxes y scores del ROI
            bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    # obtenemos coordenadas de los ground-truth de la imagen de test
    group_df = gt_grouped_by_filename.get_group(filepath[3:])
    group_df = group_df.drop(['filename', 'class'], axis=1)
    # dibuja gt bbox en la imagen en color verde
    for index, row in group_df.iterrows():
        (gt_x1, gt_y1, gt_x2, gt_y2) = row
        cv2.rectangle(img, (gt_x1, gt_y1), (gt_x2, gt_y2), (0, 255, 0), 2)

    # aplica NMS sobre los bboxes detectados y dibuja el resultado en la imagen
    all_dets = [] # almacena las detecciones
    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.2)
        
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk,:]

            # Calcula coordenadas en la imagen original y dibuja bbox detectado
            (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
            cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (0,0,255), 2)

            textLabel = '{}: {}'.format("confianza",int(100*new_probs[jk]))
            all_dets.append((key,100*new_probs[jk]))

            # muestra el string "textLabel" junto al bbox detectado
            (retval,baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_DUPLEX, 0.5, 1)
            textOrg = (real_x1, real_y1)
            xxx1 = textOrg[0] - 0
            yyy1 = textOrg[1] + baseLine - 0
            xxx2 = textOrg[0] + retval[0] + 0
            yyy2 = textOrg[1] - retval[1] - 0
            if xxx1<0 or yyy1<0 or xxx2<0 or yyy2<0:
                textOrg = (real_x2, real_y2)
                xxx1 = textOrg[0] - retval[0] - 0
                yyy1 = textOrg[1] + retval[1] + 0
                xxx2 = textOrg[0] + 0
                yyy2 = textOrg[1] - baseLine + 0
                textOrg = (xxx1, yyy1-baseLine)

            cv2.rectangle(img, (xxx1, yyy1), (xxx2, yyy2), (0, 0, 0), 1)
            cv2.rectangle(img, (xxx1, yyy1), (xxx2, yyy2), (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0), 1)

            predictions = predictions.append({"name":img_name, "xmin":real_x1, "ymin":real_y1, "xmax":real_x2, "ymax":real_y2, "class":key, "score":int(100*new_probs[jk])}, ignore_index=True)

        # almacena la imagen en disco
        print(RESULTS + '/' + img_name)
        cv2.imwrite(RESULTS + '/' + img_name, img)

#    print('Elapsed time = {}'.format(time.time() - st))
    print(all_dets)
    number_images = number_images + 1
    if ((number_images % 20) == 0):
        print('Image {}'.format(number_images))
        predictions.to_csv(RESULTS + '/' + "annotateResults.txt", header=None, index=None, sep= ",")
#    if all_dets:
#        plt.figure(figsize=(10,10))
#        plt.grid()
#        plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
#        plt.show()

# almacena las predicciones existentes en el dataframe a un archivo
predictions.to_csv(RESULTS + '/' + "annotateResults.txt", header=None, index=None, sep= ",")

Arucas_frame_01_06_29_000.jpg
F:/Datasets/FasterRCNNResults//Arucas_frame_01_06_29_000.jpg
[('dorsal', 98.15324544906616), ('dorsal', 97.39568829536438), ('dorsal', 97.3118543624878), ('dorsal', 97.01340794563293), ('dorsal', 96.98181748390198), ('dorsal', 96.94610834121704), ('dorsal', 95.60053944587708), ('dorsal', 95.5707311630249), ('dorsal', 95.52544355392456), ('dorsal', 94.75155472755432), ('dorsal', 94.57700848579407), ('dorsal', 94.26361918449402), ('dorsal', 93.99100542068481), ('dorsal', 93.83662939071655), ('dorsal', 93.60968470573425), ('dorsal', 93.04572343826294), ('dorsal', 92.55263209342957), ('dorsal', 92.30912327766418), ('dorsal', 92.28964447975159), ('dorsal', 92.23117232322693), ('dorsal', 92.06234812736511), ('dorsal', 91.94070100784302), ('dorsal', 91.918283700943), ('dorsal', 91.89455509185791), ('dorsal', 91.65550470352173), ('dorsal', 91.41327738761902), ('dorsal', 91.2253737449646), ('dorsal', 91.1083996295929), ('dorsal', 91.05907678604126), ('dorsal', 90.5

KeyboardInterrupt: 