In [3]:
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
config =  tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 占用GPU30%的显存 
session = tf.Session(config=config)

In [4]:
from __future__ import division
import random
import pprint
import sys
import time
import numpy as np
from optparse import OptionParser
import pickle
import os
import cv2
import xml.etree.ElementTree as ET
from tqdm import tqdm

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Input
from keras.models import Model
# from keras_frcnn import config, data_generators
# from keras_frcnn import losses as losses
# import keras_frcnn.roi_helpers as roi_helpers
from keras.utils import generic_utils
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [5]:
import resnet50 as nn
import losses 

### 参数初始化

In [6]:
# 设置递归深度的限制
sys.setrecursionlimit(40000)

verbose = True
network = 'resnet50'

use_horizontal_flips = False
use_vertical_flips = False
rot_90 = False

anchor_box_scales = [128, 256, 512]
anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]

im_size = 600
img_channel_mean = [103.939, 116.779, 123.68]
img_scaling_factor = 1.0
num_rois = 300
rpn_stride = 16
balanced_classes = False
std_scaling = 4.0
classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
rpn_min_overlap = 0.3
rpn_max_overlap = 0.7

classifier_min_overlap = 0.1
classifier_max_overlap = 0.5
class_mapping = None
model_path = './model_frcnn_resnet.hdf5'

In [7]:
# 数据集VOC2012所在的文件夹
train_path = "/home/amax/Documents/"
parser ="pascal_voc"
num_rois = 32
network = 'resnet50'

horizontal_flips = False
vertical_flips = False
rot_90 = False

num_epochs = 500                
config_filename = "config.pickle"
# 训练后权重文件路径
output_weight_path = './model_frcnn.hdf5'
input_weight_path = "resnet50_weights_tf_dim_ordering_tf_kernels.h5"

### 数据处理

In [8]:
def get_data(input_path):
    all_imgs = []
    classes_count = {}
    # 类别映射
    class_mapping = {}

    visualise = False

    data_paths = [os.path.join(input_path, 'VOC2012')]

    print('Parsing annotation files')
    for data_path in data_paths:

        annot_path = os.path.join(data_path, 'Annotations')
        imgs_path = os.path.join(data_path, 'JPEGImages')

        imgsets_path_trainval = os.path.join(data_path, 'ImageSets', 'Main', 'trainval.txt')
        imgsets_path_train = os.path.join(data_path, 'ImageSets', 'Main', 'train.txt')
        imgsets_path_val = os.path.join(data_path, 'ImageSets', 'Main', 'val.txt')
        imgsets_path_test = os.path.join(data_path, 'ImageSets', 'Main', 'test.txt')

        trainval_files = []
        train_files = []
        val_files = []
        test_files = []

        with open(imgsets_path_trainval) as f:
            for line in f:
                trainval_files.append(line.strip() + '.jpg')

        with open(imgsets_path_train) as f:
            for line in f:
                train_files.append(line.strip() + '.jpg')

        with open(imgsets_path_val) as f:
            for line in f:
                val_files.append(line.strip() + '.jpg')

        if os.path.isfile(imgsets_path_test):
            with open(imgsets_path_test) as f:
                for line in f:
                    test_files.append(line.strip() + '.jpg')

        annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
        idx = 0

        annots = tqdm(annots)
        for annot in annots:
            # try:
            exist_flag = False
            idx += 1
            annots.set_description("Processing %s" % annot.split(os.sep)[-1])

            et = ET.parse(annot)
            element = et.getroot()

            element_objs = element.findall('object')
            element_filename = element.find('filename').text
            element_width = int(element.find('size').find('width').text)
            element_height = int(element.find('size').find('height').text)

            if len(element_objs) > 0:
                annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
                                   'height': element_height, 'bboxes': []}

                annotation_data['image_id'] = idx

                if element_filename in trainval_files:
                    annotation_data['imageset'] = 'trainval'
                    exist_flag = True

                if element_filename in train_files:
                    annotation_data['imageset'] = 'train'
                    exist_flag = True

                if element_filename in val_files:
                    annotation_data['imageset'] = 'val'
                    exist_flag = True

                if len(test_files) > 0:
                    if element_filename in test_files:
                        annotation_data['imageset'] = 'test'
                        exist_flag = True

            # annotation file not exist in ImageSet
            if not exist_flag:
                continue

            for element_obj in element_objs:
                class_name = element_obj.find('name').text
                if class_name not in classes_count:
                    classes_count[class_name] = 1
                else:
                    classes_count[class_name] += 1

                # class mapping 
                if class_name not in class_mapping:
                    class_mapping[class_name] = len(class_mapping) 

                obj_bbox = element_obj.find('bndbox')
                x1 = int(round(float(obj_bbox.find('xmin').text)))
                y1 = int(round(float(obj_bbox.find('ymin').text)))
                x2 = int(round(float(obj_bbox.find('xmax').text)))
                y2 = int(round(float(obj_bbox.find('ymax').text)))
                difficulty = int(element_obj.find('difficult').text) == 1
                annotation_data['bboxes'].append(
                    {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
            all_imgs.append(annotation_data)

            if visualise:
                img = cv2.imread(annotation_data['filepath'])
                for bbox in annotation_data['bboxes']:
                    cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox['x2'], bbox['y2']), (0, 0, 255))
                cv2.imshow('img', img)
                print(annotation_data['imageset'])
                cv2.waitKey(0)

            # except Exception as e:
            #     print(e)
            #     continue
    return all_imgs, classes_count, class_mapping

all_imgs, classes_count, class_mapping = get_data(train_path)


Processing 2007_002895.xml:   0%|          | 52/17125 [00:00<00:32, 519.42it/s]

Parsing annotation files


Processing 2010_002187.xml: 100%|██████████| 17125/17125 [00:23<00:00, 722.19it/s]


In [20]:
print(len(all_imgs))
print(classes_count)

11540
{'chair': 2906, 'bottle': 1482, 'boat': 999, 'pottedplant': 1099, 'cow': 702, 'motorbike': 751, 'horse': 750, 'cat': 1227, 'train': 656, 'diningtable': 747, 'bicycle': 790, 'bird': 1221, 'tvmonitor': 826, 'bg': 0, 'person': 10129, 'dog': 1541, 'sheep': 994, 'car': 2364, 'sofa': 786, 'aeroplane': 954, 'bus': 637}


In [10]:
# 如果没有背景类，向里面加入背景类
if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

# 将class_mapping以字典形式保存
inv_map = {v: k for k, v in class_mapping.items()}

print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))

# 打乱数据
random.shuffle(all_imgs)

num_imgs = len(all_imgs)

train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
val_imgs = [s for s in all_imgs if s['imageset'] == 'val']
test_imgs = [s for s in all_imgs if s['imageset'] == 'test']

#显示训练集，验证集，测试集的数量
print('Num train samples {}'.format(len(train_imgs)))
print('Num val samples {}'.format(len(val_imgs)))
print('Num test samples {}'.format(len(test_imgs)))

Training images per class:
{'aeroplane': 954,
 'bg': 0,
 'bicycle': 790,
 'bird': 1221,
 'boat': 999,
 'bottle': 1482,
 'bus': 637,
 'car': 2364,
 'cat': 1227,
 'chair': 2906,
 'cow': 702,
 'diningtable': 747,
 'dog': 1541,
 'horse': 750,
 'motorbike': 751,
 'person': 10129,
 'pottedplant': 1099,
 'sheep': 994,
 'sofa': 786,
 'train': 656,
 'tvmonitor': 826}
Num classes (including bg) = 21
Num train samples 5717
Num val samples 5823
Num test samples 0


In [11]:
# 数据增强
import data_generator
import resnet60_Copy1 as nn
data_gen_train = data_generator.get_anchor_gt(train_imgs, classes_count,nn.get_img_output_length,mode='train')
data_gen_val = data_generator.get_anchor_gt(val_imgs, classes_count, nn.get_img_output_length,mode='val')
data_gen_test = data_generator.get_anchor_gt(test_imgs, classes_count,nn.get_img_output_length, mode='val')

### 搭建网络

In [12]:
input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

shared_layers = nn.nn_base(img_input, trainable=True)
anchor_box_scales = [128, 256, 512]
anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]
num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)

In [14]:
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, num_rois, nb_classes=len(classes_count), trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

model_all = Model([img_input, roi_input], rpn[:2] + classifier)

# 此处最好先下载好模型，初始化模型的位置input_weight_path
try:
    print('loading weights from {}'.format(input_weight_path))
    model_rpn.load_weights(input_weight_path, by_name=True)
    model_classifier.load_weights(input_weight_path, by_name=True)
except:
    print('Could not load pretrained model weights. Weights can be found in the keras application folder \
        https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

log_path = './logs'
if not os.path.isdir(log_path):
    os.mkdir(log_path)
    
callback = TensorBoard(log_path)
callback.set_model(model_all)

def write_log(callback, names, logs, batch_no):
    for name, value in zip(names, logs):
        summary = tf.Summary()
        summary_value = summary.value.add()
        summary_value.simple_value = value
        summary_value.tag = name
        callback.writer.add_summary(summary, batch_no)
        callback.writer.flush()

loading weights from resnet50_weights_tf_dim_ordering_tf_kernels.h5


In [15]:
class_mapping = inv_map
class_mapping = {v: k for k, v in class_mapping.items()}

print(class_mapping)

{'chair': 4, 'bottle': 7, 'boat': 15, 'pottedplant': 3, 'cow': 0, 'motorbike': 13, 'horse': 14, 'cat': 9, 'train': 19, 'diningtable': 6, 'bicycle': 2, 'bird': 12, 'tvmonitor': 10, 'bg': 20, 'person': 1, 'dog': 16, 'sheep': 17, 'car': 11, 'sofa': 8, 'aeroplane': 5, 'bus': 18}


In [16]:
epoch_length = 1000
num_epochs = int(num_epochs)
iter_num = 0
train_step = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')

Starting training


In [17]:
import roi_helpers as roi_helper

In [21]:
for epoch_num in range(num_epochs):

    progbar = generic_utils.Progbar(epoch_length)  
    print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

    while True:
        # try:
        # mean overlapping bboxes 
        if len(rpn_accuracy_rpn_monitor) == epoch_length and verbose:
            mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
            rpn_accuracy_rpn_monitor = []
            print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
            if mean_overlapping_bboxes == 0:
                print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')

        X, Y, img_data = next(data_gen_train)
        

        loss_rpn = model_rpn.train_on_batch(X, Y)
        write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step)

        P_rpn = model_rpn.predict_on_batch(X)

        R = roi_helper.rpn_to_roi(P_rpn[0], P_rpn[1], K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300)
        X2, Y1, Y2, IouS = roi_helper.calc_iou(R, img_data, class_mapping)

        if X2 is None:
            rpn_accuracy_rpn_monitor.append(0)
            rpn_accuracy_for_epoch.append(0)
            continue

        # sampling positive/negative samples
        neg_samples = np.where(Y1[0, :, -1] == 1)
        pos_samples = np.where(Y1[0, :, -1] == 0)

        if len(neg_samples) > 0:
            neg_samples = neg_samples[0]
        else:
            neg_samples = []

        if len(pos_samples) > 0:
            pos_samples = pos_samples[0]
        else:
            pos_samples = []

        rpn_accuracy_rpn_monitor.append(len(pos_samples))
        rpn_accuracy_for_epoch.append((len(pos_samples)))

        if num_rois > 1:
            if len(pos_samples) < num_rois//2:
                selected_pos_samples = pos_samples.tolist()
            else:
                selected_pos_samples = np.random.choice(pos_samples, num_rois//2, replace=False).tolist()
            try:
                selected_neg_samples = np.random.choice(neg_samples, num_rois - len(selected_pos_samples), replace=False).tolist()
            except:
                selected_neg_samples = np.random.choice(neg_samples, num_rois - len(selected_pos_samples), replace=True).tolist()

            sel_samples = selected_pos_samples + selected_neg_samples
        else:
            # in the extreme case where num_rois = 1, we pick a random pos or neg sample
            selected_pos_samples = pos_samples.tolist()
            selected_neg_samples = neg_samples.tolist()
            if np.random.randint(0, 2):
                sel_samples = random.choice(neg_samples)
            else:
                sel_samples = random.choice(pos_samples)
        loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
        write_log(callback, ['detection_cls_loss', 'detection_reg_loss', 'detection_acc'], loss_class, train_step)
        train_step += 1

        losses[iter_num, 0] = loss_rpn[1]
        losses[iter_num, 1] = loss_rpn[2]

        losses[iter_num, 2] = loss_class[1]
        losses[iter_num, 3] = loss_class[2]
        losses[iter_num, 4] = loss_class[3]

        iter_num += 1

        progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                  ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])

        if iter_num == epoch_length:
            loss_rpn_cls = np.mean(losses[:, 0])
            loss_rpn_regr = np.mean(losses[:, 1])
            loss_class_cls = np.mean(losses[:, 2])
            loss_class_regr = np.mean(losses[:, 3])
            class_acc = np.mean(losses[:, 4])

            mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
            rpn_accuracy_for_epoch = []

            if verbose:
                print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                print('Loss RPN regression: {}'.format(loss_rpn_regr))
                print('Loss Detector classifier: {}'.format(loss_class_cls))
                print('Loss Detector regression: {}'.format(loss_class_regr))
                print('Elapsed time: {}'.format(time.time() - start_time))

            curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
            iter_num = 0
            start_time = time.time()

            write_log(callback,
                      ['Elapsed_time', 'mean_overlapping_bboxes', 'mean_rpn_cls_loss', 'mean_rpn_reg_loss',
                       'mean_detection_cls_loss', 'mean_detection_reg_loss', 'mean_detection_acc', 'total_loss'],
                      [time.time() - start_time, mean_overlapping_bboxes, loss_rpn_cls, loss_rpn_regr,
                       loss_class_cls, loss_class_regr, class_acc, curr_loss],
                      epoch_num)

            if curr_loss < best_loss:
                if verbose:
                    print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
                best_loss = curr_loss
                model_all.save_weights(model_path)

            break

        # except Exception as e:
        #     print('Exception: {}'.format(e))
        #     continue

print('Training complete, exiting.')


Epoch 1/1

KeyboardInterrupt: 

### 测试网络

In [None]:
import os
import cv2
import numpy as np
import sys
import pickle
import time
import resnet60_Copy1 as nn

In [None]:
num_rois = 32
img_path = test_path = "./images"
network = "resnet50"
use_horizontal_flips = False
use_vertical_flips = False
rot_90 = False

In [None]:
def format_img_size(img):
    """ formats the image size based on config """
    img_min_side = float(im_size)
    (height, width ,_) = img.shape

    if width <= height:
        ratio = img_min_side/width
        new_height = int(ratio * height)
        new_width = int(img_min_side)
    else:
        ratio = img_min_side/height
        new_width = int(ratio * width)
        new_height = int(img_min_side)
    img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
    return img, ratio

def format_img_channels(img):
    """ formats the image channels based on config """
    img = img[:, :, (2, 1, 0)]
    img = img.astype(np.float32)
    img[:, :, 0] -= img_channel_mean[0]
    img[:, :, 1] -= img_channel_mean[1]
    img[:, :, 2] -= img_channel_mean[2]
    img /= img_scaling_factor
    img = np.transpose(img, (2, 0, 1))
    img = np.expand_dims(img, axis=0)
    return img

def format_img(img):
    """ formats an image for model prediction based on config """
    img, ratio = format_img_size(img)
    img = format_img_channels(img)
    return img, ratio

# Method to transform the coordinates of the bounding box to its original size
def get_real_coordinates(ratio, x1, y1, x2, y2):

    real_x1 = int(round(x1 // ratio))
    real_y1 = int(round(y1 // ratio))
    real_x2 = int(round(x2 // ratio))
    real_y2 = int(round(y2 // ratio))

    return (real_x1, real_y1, real_x2 ,real_y2)

In [None]:
if 'bg' not in class_mapping:
    class_mapping['bg'] = len(class_mapping)

class_mapping = {v: k for k, v in class_mapping.items()}
print(class_mapping)
class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
num_rois = int(num_rois)


In [None]:
num_features = 1024

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
    input_shape_features = (num_features, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)


img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, num_rois, nb_classes=len(class_mapping), trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)

print('Loading weights from {}'.format(model_path))
model_rpn.load_weights(model_path, by_name=True)
model_classifier.load_weights(model_path, by_name=True)

model_rpn.compile(optimizer='sgd', loss='mse')
model_classifier.compile(optimizer='sgd', loss='mse')


In [None]:
all_imgs = []

classes = {}

bbox_threshold = 0.8

visualise = True

for idx, img_name in enumerate(sorted(os.listdir(img_path))):
    if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
        continue
    print(img_name)
    st = time.time()
    filepath = os.path.join(img_path,img_name)

    img = cv2.imread(filepath)

    X, ratio = format_img(img)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)


    R = roi_helpers.rpn_to_roi(Y1, Y2,  K.image_dim_ordering(), overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0]//num_rois + 1):
        ROIs = np.expand_dims(R[num_rois*jk:num_rois*(jk+1), :], axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0]//num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0],num_rois,curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                tx /= classifier_regr_std[0]
                ty /= classifier_regr_std[1]
                tw /= classifier_regr_std[2]
                th /= classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([rpn_stride*x, rpn_stride*y, rpn_stride*(x+w), rpn_stride*(y+h)])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk,:]

            (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

            textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
            all_dets.append((key,100*new_probs[jk]))

            (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
            textOrg = (real_x1, real_y1-0)

            cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
            cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

    print('Elapsed time = {}'.format(time.time() - st))
    print(all_dets)
    #cv2.imshow('img', img)
    #cv2.waitKey(0)
    cv2.imwrite('./results_imgs/{}.png'.format(idx),img)
