In [1]:
def get_current_signal(bbox_tensor, image_size):
  # bbox_tensor[N, [x1,x2,y1,y2,confdence,class]]
  h,w = image_size
  w_center = w/2
  x_center = bbox_tensor[:,1] - bbox_tensor[:,0]
  deltas = torch.abs(w_center-x_center) 
  min_delta, min_idx = torch.min(deltas,0)
  return min_idx


In [2]:
{"stop": 0, "warning": 1, "go": 2, "ambiguous": 3}

mapping = {0:"stop", 1:'warning', 2:'go', 3:'ambiguous'}

def create_json(labels, image_size, path):
  """
    labels_tensor : (N, 10, 5)
    N - number of frames
    10 - max number pf traffic signals
    5 - number of label values
    path : path to file
  """
  dicts = []
  for i, label_tensor in enumerate(labels):
    frame_dict = {str(i):{}}
    min_idx = get_current_signal(label_tensor, (256,256))
    for j, labeled_frame in enumerate(label_tensor):
      frame_dict[str(i)][str(j)]={}
      frame_dict[str(i)][str(j)]['coords'] = list(np.array(labeled_frame[0:4].numpy(), dtype=int))
      frame_dict[str(i)][str(j)]['state'] = mapping[labeled_frame[4].item()]
      frame_dict[str(i)][str(j)]['affect'] = False
      if j == min_idx:
        frame_dict[str(i)][str(j)]['affect'] = True
    dicts.append(frame_dict)

  dicts = eval(str(dicts))
  import json
  with open(path, "w") as f:
    json.dump(dicts, f)

In [4]:
A = [[
     [0,0,10,10,1],
     [0,10,10,20,2],
     [0,10,20,20,2],
],[
     [0,0,10,10,1],
     [0,10,10,20,2],
     [0,10,20,20,2],
],
]
import numpy as np
import torch
A = torch.Tensor(A)
predict_path = '...'
create_json(A, (256,256), predict_path)

In [5]:
"""We consider non overlapping sets of intervals only."""

import os
import json
import argparse

import numpy as np

THRESHOLD_IOU = 0.5


def parse_arguments():
    parser = argparse.ArgumentParser(description='Project evaluation metric')
    parser.add_argument(
        '-p',
        '--pred_path',
        required=True,
        type=str,
        help='Path to the prediction .json file',
    )
    parser.add_argument(
        '-gt',
        '--gt_path',
        required=True,
        type=str,
        help='Path to the ground truth .json file',
    )
    return parser.parse_args()


def tl_encode(tl_history):
    res = []
    if len(tl_history) == 1:
        res = [[tl_history[0], tl_history[0]]]
    elif len(tl_history) > 1:
        w = np.where(np.convolve(tl_history, [1, -1]) > 1)[0]
        if len(w) == 0 or w[0] != 0:
            w = np.concatenate(([0], w))
        if w[-1] != len(tl_history):
            w = np.concatenate((w, [len(tl_history)]))
        for i in range(len(w) - 1):
            res.append([tl_history[w[i]], tl_history[w[i + 1] - 1]])
    return res


def read_json(filename):
    with open(filename) as json_file:
        data = json.load(json_file)
    return data


def read_prediction(pred_path):
    assert os.path.exists(pred_path), "Pred file does not exist %s" % pred_path
    tl_history = {'green': [], 'yellow': [], 'red': []}
    data = read_json(pred_path)
    for frame_id in sorted(map(int, data.keys())):
        frame_idx_str = str(frame_id)
        for traffic_light_id in data[frame_idx_str]:
            traffic_light = data[frame_idx_str][traffic_light_id]
            if traffic_light['affect'] \
                    and traffic_light['state'] in tl_history.keys():
                tl_history[traffic_light['state']].append(frame_id)

    # Encode to intervals
    for color in tl_history:
        if len(tl_history[color]) > 0:
            tl_history[color] = tl_encode(tl_history[color])

    return tl_history


def calculate_iou(a, b):
    if a[1] >= b[0] and b[1] >= a[0]:
        intersection_int = [max(a[0], b[0]), min(a[1], b[1])]
        union_int = [min(a[0], b[0]), max(a[1], b[1])]
        intersection = intersection_int[1] - intersection_int[0]
        union = union_int[1] - union_int[0]
        if union > 0:
            return intersection / union
    return 0.0


def evaluate_f1(preds, trgs):
    def division(numerator, denominator):
        m = 0.0
        if denominator != 0:
            m = numerator / denominator
        return m

    if len(preds) > 0 or len(trgs) > 0:
        ious = [0.0] * len(preds)
        for i in range(len(preds)):
            for trg in trgs:
                iou = calculate_iou(preds[i], trg)
                if iou > ious[i]:
                    ious[i] = iou
        ious = np.array(ious) > THRESHOLD_IOU

        TP = np.count_nonzero(ious)
        FP = len(preds) - TP
        FN = len(trgs) - TP
        precision = division(TP, (TP + FP))
        recall = division(TP, (TP + FN))
        F1 = division(2 * precision * recall, (precision + recall))
    else:
        F1 = 1.0
    return F1


def metric(pred_path: str, gt_path: str):
    assert os.path.exists(pred_path), "Predictions file does not exist"
    assert os.path.exists(gt_path), "Ground truth file does not exist"
    pred = read_prediction(pred_path)
    gt = read_json(gt_path)
    print("Predicted:", pred)
    print("Ground truth:", gt)
    f1 = []
    print("Color metrics:")
    for tl_state in gt.keys():
        color_metric = evaluate_f1(pred[tl_state], gt[tl_state])
        f1.append(color_metric)
        print(tl_state, color_metric)
    print("Final result:", np.mean(f1))



metric(pred_path=predict_path, gt_path='video_5_gt.json')


Predicted: {'green': [[125, 221], [356, 626], [909, 916], [925, 925], [929, 946], [949, 950], [953, 979], [981, 1016], [1019, 1020], [1023, 1024]], 'yellow': [[222, 288], [305, 355]], 'red': [[1133, 2402]]}
Ground truth: {'green': [[130, 219], [367, 617], [860, 1021]], 'yellow': [[222, 279], [320, 354]], 'red': [[1119, 2402]]}
Color metrics:
green 0.30769230769230765
yellow 1.0
red 1.0
Final result: 0.7692307692307692
