In [1]:
"""
Use this ipynb to run inference for the following TensorFlow models:
- CenterNet HourGlass104 512x512
- SSD MobileNet V2 FPNLite 320, adapted to 640

For this code to run, a specific folder structure is required;
different folder structure can be used after specifying the paths for all required files.
Default folder structure:
main folder
  - Tensorflow
    - models
    - protoc
    - scripts
    - workspace
      - annotations
      - images
      - models
      - pre-trained-models

Model training is not covered in this notebook. 

Software requirements are only covered briefly here. It is highly recommended to use a separate environemnt,
such as Conda environment or Docker container.
Some software requirements:
- TensorFlow Object Detection API
- Protobuf for TF OD API
- Any necessary Python packages and Path/Env variables required by the above

"""

'\nUse this ipynb to run inference for the following TensorFlow models:\n- CenterNet HourGlass104 512x512\n- SSD MobileNet V2 FPNLite 320, adapted to 640\n\nFor this code to run, a specific folder structure is required;\ndifferent folder structure can be used after specifying the paths for all required files.\n\nModel training is not covered in this notebook. \n\nSoftware requirements are only covered briefly here. It is highly recommended to use a separate environemnt,\nsuch as Conda environment or Docker container.\nSome software requirements:\n- TensorFlow Object Detection API\n- Protobuf for TF OD API\n- Any necessary Python packages and Path/Env variables required by the above\n\n\n'

### Folder Structure and paths

In [5]:
import os

In [3]:
# change model file name and TF2 model zoo url
CUSTOM_MODEL_NAME = 'CenterNet' 
PRETRAINED_MODEL_NAME = 'centernet_hg104_512x512_coco17_tpu-8'
PRETRAINED_MODEL_URL = 'http://download.tensorflow.org/models/object_detection/tf2/20200713/centernet_hg104_512x512_coco17_tpu-8.tar.gz'
TF_RECORD_SCRIPT_NAME = 'generate_tfrecord.py'
LABEL_MAP_NAME = 'label_map.pbtxt'
# Label map and TF record no need to change, because we are using the same dataset as SSD model.

In [6]:
paths = {
    'WORKSPACE_PATH': os.path.join('Tensorflow', 'workspace'),
    'SCRIPTS_PATH': os.path.join('Tensorflow','scripts'),
    'APIMODEL_PATH': os.path.join('Tensorflow','models'),
    'ANNOTATION_PATH': os.path.join('Tensorflow', 'workspace','annotations'),
    'IMAGE_PATH': os.path.join('Tensorflow', 'workspace','images'),
    'MODEL_PATH': os.path.join('Tensorflow', 'workspace','models'),
    'PRETRAINED_MODEL_PATH': os.path.join('Tensorflow', 'workspace','pre-trained-models'),
    'CHECKPOINT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME), 
    'OUTPUT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'export'), 
    'TFJS_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfjsexport'), 
    'TFLITE_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfliteexport'), 
    'PROTOC_PATH':os.path.join('Tensorflow','protoc')
 }

In [7]:
files = {
    'PIPELINE_CONFIG':os.path.join('Tensorflow', 'workspace','models', CUSTOM_MODEL_NAME, 'pipeline.config'),
    'TF_RECORD_SCRIPT': os.path.join(paths['SCRIPTS_PATH'], TF_RECORD_SCRIPT_NAME), 
    'LABELMAP': os.path.join(paths['ANNOTATION_PATH'], LABEL_MAP_NAME)
}

### Inference

In [8]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import os
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
from object_detection.utils import config_util
from xml.dom.minidom import parseString
from lxml.etree import Element, SubElement, tostring

In [None]:
""" 
One cell to automate reading images, run inference, and write predictions to xml file.
Remember to change input images dir, confidence threshold, SSD model checkpoint at the top;
and where to save prediction xml files in the xml block towards the end of this cell.
"""

# Images folder/path
# IMAGE_PATH = r'D:\DL_data\Accuracy\Images'    # CHANGE this, it's where you store images to be fed to the model
IMAGE_PATH = r'D:\DL_data\Accuracy\Stage 2 Ensemble\Test_images'

# set confidence score threshold; for this SSD confidence score 0.3+ is generally good
conf_thd = 0.28

######## TFOD block #########
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(files['PIPELINE_CONFIG'])
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(paths['CHECKPOINT_PATH'], 'ckpt-24')).expect_partial()   # Change check point number to latest version

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

category_index = label_map_util.create_category_index_from_labelmap(files['LABELMAP'])

######## NMS block, to remove redundant bboxes  ########
def nms(rects, thd=0.5):    # rects here probably refer to detections
    """
    Filter rectangles
    rects is array of oblects ([x1,y1,x2,y2], confidence, class)
    thd - intersection threshold (intersection divides min square of rectange)    # same as IoU?
    """
    out = []
    remove = [False] * len(rects)
    for i in range(0, len(rects) - 1):
        if remove[i]:
            continue
        inter = [0.0] * len(rects)
        for j in range(i, len(rects)):
            if remove[j]:
                continue
            inter[j] = intersection(rects[i][0], rects[j][0]) / min(square(rects[i][0]), square(rects[j][0]))
        max_prob = 0.0
        max_idx = 0
        for k in range(i, len(rects)):
            if inter[k] >= thd:
                if rects[k][1] > max_prob:
                    max_prob = rects[k][1]
                    max_idx = k
        for k in range(i, len(rects)):
            if (inter[k] >= thd) & (k != max_idx):
                remove[k] = True
    for k in range(0, len(rects)):
        if not remove[k]:
            out.append(rects[k])
    boxes = [box[0] for box in out]
    scores = [score[1] for score in out]
    classes = [cls[2] for cls in out]
    return boxes, scores, classes

def intersection(rect1, rect2):
    """
    Calculates square of intersection of two rectangles
    rect: list with coords of top-right and left-boom corners [x1,y1,x2,y2]
    return: square of intersection
    """
    x_overlap = max(0, min(rect1[2], rect2[2]) - max(rect1[0], rect2[0]));
    y_overlap = max(0, min(rect1[3], rect2[3]) - max(rect1[1], rect2[1]));
    overlapArea = x_overlap * y_overlap;
    return overlapArea

def square(rect):
    """ Calculates square of rectangle """
    return abs(rect[2] - rect[0]) * abs(rect[3] - rect[1])

def predictions_above_confidence(boxes_nms, scores_nms, labels_nms, conf_thd):
    boxes_final, scores_final, labels_final = [], [], []     # create empty placeholder lists
    for pred in zip(boxes_nms, scores_nms, labels_nms):
        b, s, l = pred    # unpack predicted boxes, score, label
        if s >= conf_thd:    # select boxes if confidence > threshold
            boxes_final.append(b)
            scores_final.append(s)
            labels_final.append(l)
    return boxes_final, scores_final, labels_final

########  Write to txt block  ########
def write_predictions_txt(boxes_list, scores_list, labels_list, image_name):
    # choose output path
    out_path = r'D:\DL_data\Accuracy\Stage 2 Ensemble\cn'
    
    # create and open a txt file
    # parsed arg image_name has no file extension
    with open(os.path.join(out_path, image_name+'.txt'), 'w') as f:    # txt needs to have same name as input image/annotation
#         lines = []
        # loop thru all predicted boxes for this image
        for box, score, label in zip(boxes_list, scores_list, labels_list):
            y1, x1, y2, x2 = box    # confirmed format of TFOD predictions
            # convert into yolo [x,y,w,h] in RELATIVE numbers
            cx = (x1 + x2) / 2    # x-center
            cy = (y1 + y2) / 2    # y-center
            w = x2 - x1    # width
            h = y2 - y1    # height
            line = str(label)+' '+f'{score:.5f}'+' '+str(cx)+' '+str(cy)+' '+str(w)+' '+str(h)+'\n'
            f.write(line)

########  Write to xml block  ########
def write_predictions_xml(boxes_list, labels_list, width, height, image_name):
    # hard code the label names, so we don't have to read files; and names won't change
    label_names = [ # 23 road mark classes
      {'name':'25', 'id':1}, {'name':'30', 'id':2}, {'name':'35', 'id':3}, {'name':'40', 'id':4},
      {'name':'45', 'id':5}, {'name':'50', 'id':6}, {'name':'bike', 'id':7}, {'name':'bus', 'id':8},
      {'name':'diamond', 'id':9}, {'name':'F', 'id':10}, {'name':'FL', 'id':11}, {'name':'FR', 'id':12},
      {'name':'KC', 'id':13}, {'name':'FLR', 'id':14}, {'name':'L', 'id':15}, {'name':'ped', 'id':16},
      {'name':'rail', 'id':17}, {'name':'R', 'id':18}, {'name':'school', 'id':19}, {'name':'signal', 'id':20},
      {'name':'stop', 'id':21}, {'name':'xing', 'id':22}, {'name':'yield', 'id':23},
       # 9 helper classes
      {'name':'biker', 'id':24}, {'name':'car', 'id':25}, {'name':'pedestrian', 'id':26},
      {'name':'traffic_light', 'id':27}, {'name':'truck', 'id':28}, {'name':'stop_sign', 'id':29},
      {'name':'yield_sign', 'id':30}, {'name':'school_sign', 'id':31}, {'name':'ped_sign', 'id':32} ]

    
    """  Set output folder below:  """
    outpath = r'D:\DL_data\Accuracy\Stage 2 Ensemble\cn'    #### REMEMBER TO UPDATE THIS
    
#     img= cv2.imread(image_path)    # read image file from another folder
#     height, width, channels = img.shape
    node_root = Element('annotation')
    node_folder = SubElement(node_root, 'folder')
    node_folder.text = 'folder'    # customize what you want the 'folder' line to say
    node_filename = SubElement(node_root, 'filename')
    img_name = image_name    # will set the image name as the image_name parsed to this fn
    node_filename.text = image_name
    node_source= SubElement(node_root, 'source')
    node_database = SubElement(node_source, 'database')
    node_database.text = 'Predictions from Custom Models'    # customize what you want to appear under 'database'
    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = str(width)
    node_height = SubElement(node_size, 'height')
    node_height.text = str(height)
    node_depth = SubElement(node_size, 'depth')
    node_depth.text = str(channels)
    node_segmented = SubElement(node_root, 'segmented')
    node_segmented.text = '0'
    # now we turn boxes_list and labels_list into 
    # boxes_formatted = [ [class, x1, y1, x2, y2],
    #                     [class, x1, y1, x2, y2], ...]     should be x1, y1, x2, y2 but order can be adjusted later
    boxes_formatted = []    
    for box, label in zip(boxes_final, labels_final):
        y1, x1, y2, x2 = box    # this is the correct format of TFOD predicted boxes
        boxes_formatted.append([label, x1, y1, x2, y2])    # change format to what we want
    for box in boxes_formatted:    # loop thru each detected box
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = label_names[box[0]]['name']    # need to translate label from int to real label name
        node_pose = SubElement(node_object, 'pose')
        node_pose.text = 'Unspecified'
        node_truncated = SubElement(node_object, 'truncated')
        node_truncated.text = '0'
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_bndbox = SubElement(node_object, 'bndbox')    # start to write bbox coord in json format
        
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = str(int(box[1] * width))      # remember to do absolute coordinates for voc xml format
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = str(int(box[2] * height))
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text =  str(int(box[3] * width))
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = str(int(box[4] * height))
    xml = tostring(node_root, pretty_print=True)    # print progress in command prompt?
#     f =  open(outpath % image_name.split('.')[0], "wb")
#     f = open(os.path.join('outputs', image_name.split('.')[0] + '_SSD' + '.xml'), "wb")    # customize your xml file name
    f = open(os.path.join(outpath, image_name.split('.')[0] + '_CN' + '.xml'), "wb")
    f.write(xml)
    f.close()

########  Functional code block  ########
# loop thru all images in image_path
for file in os.listdir(IMAGE_PATH):
    image_name = file.split('.')[0]    # extract file name without file extension

    # TFOD model , run inference
    img = cv2.imread( os.path.join(IMAGE_PATH, file) )
    height, width, channels = img.shape
    image_np = np.array(img)
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}
    
    # Extract TFOD output
    boxes_list = detections['detection_boxes']
    scores_list = detections['detection_scores']  
    labels_list = [int(i) for i in detections['detection_classes']] # somehow TFOD returns float for predicted classes, change it to int
    
#     # prepare data for NMS; rects is array of oblects ([x1,y1,x2,y2], confidence, class)
    rectangles = [(b, s, l) for (b, s, l) in zip(boxes_list, scores_list, labels_list)]
    
#     # run NMS non-max suppression
    boxes_nms, scores_nms, labels_nms = nms(rectangles)
    
#     # filer out predictions lower than confidence threshold
    boxes_final, scores_final, labels_final = predictions_above_confidence(boxes_nms, scores_nms, labels_nms, conf_thd)

#     # call write fn to write predictions into xml files
#     write_predictions_xml(boxes_final, labels_final, width, height, file)
                   
    # write predictions to txt file as required by review_object_detection_metrics
    write_predictions_txt(boxes_final, scores_final, labels_final, image_name)