try structural integrity index

In [49]:
import numpy as np
import cv2
from shapely.geometry import Polygon
import os

In [50]:
#constants
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
MIN_CONFIDENCE = 0.4
NON_MAXIMUM_SUPPRESION_THRESH = 0.45
SCORE_THRESH = 0.25
CLASSES = ['bicycle', 'car', 'motorbike', 'bus', 'truck']
LANE1 = [[258, 408], [105, 201], [113, 125], [408, 388]]
LANE2 = [[408, 388], [88, 98], [98, 51], [528, 378]]
LANE3 = [[528, 378], [48, 8], [78, 0], [638, 368]]
LANE4 = [[1142, 373], [1918, 292], [1915, 318], [1188, 393]]
LANE5 = [[1188, 393], [1915, 318], [1918, 343], [1243, 415]]
LANE6 = [[1243, 415], [1918, 343], [1918, 373], [1295, 440]]
LANE7 = [[1418, 618], [1918, 873], [1645, 878], [1243, 643]]
LANE8 = [[1243, 643], [1645, 878], [1398, 878], [1083, 664]]
LANE9 = [[1083, 664], [1398, 878], [1183, 878], [948, 683]]
LANES = [
    np.array(LANE1, np.int32).reshape((-1,1,2)),
    np.array(LANE2, np.int32).reshape((-1,1,2)),
    np.array(LANE3, np.int32).reshape((-1,1,2)),

    np.array(LANE4, np.int32).reshape((-1,1,2)),
    np.array(LANE5, np.int32).reshape((-1,1,2)),
    np.array(LANE6, np.int32).reshape((-1,1,2)),

    np.array(LANE7, np.int32).reshape((-1,1,2)),
    np.array(LANE8, np.int32).reshape((-1,1,2)),
    np.array(LANE9, np.int32).reshape((-1,1,2))
]
LANE_POLYGONS = [
    Polygon(LANE1),
    Polygon(LANE2),
    Polygon(LANE3),
    Polygon(LANE4),
    Polygon(LANE5),
    Polygon(LANE6),
    Polygon(LANE7),
    Polygon(LANE8),
    Polygon(LANE9)
]

In [51]:
net = cv2.dnn.readNet('assets/yolov5s.onnx')

In [52]:
# image has to be converted to 640x640 for yolov5
def format_yolov5(image):
    row, col, _ = image.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = image
    
    return result

In [53]:
def get_preds(image, net):
    input_image = format_yolov5(image)
    blob = cv2.dnn.blobFromImage(input_image , 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True)
    net.setInput(blob)

    return net.forward()

In [54]:
def classnames_to_ids(classnames):
    return [id for id in range(len(classnames)) if classnames[id] in CLASSES]

In [55]:
def get_classes(path="assets/classes.txt"):
    class_list = []
    with open(path, "r") as f:
        class_list = [cname.strip() for cname in f.readlines()]
    return class_list

In [56]:
def unwrap_detections(
    predictions, 
    formatted_image, 
    image, 
    classes_path="assets/classes.txt", 
    show_intermediary=False
):
    class_ids = []
    confidences = []
    boxes = []

    output_data = predictions[0]
    rows = output_data.shape[0]

    image_width, image_height, _ = formatted_image.shape
    x_factor = image_width / INPUT_WIDTH
    y_factor = image_height / INPUT_HEIGHT

    for r in range(rows):
        row = output_data[r]
        confidence = row[4]
        if confidence >= MIN_CONFIDENCE:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > SCORE_THRESH):

                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    class_list = get_classes(classes_path)

    desired_classes_ids = classnames_to_ids(class_list) 

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESH, NON_MAXIMUM_SUPPRESION_THRESH) 

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        if class_ids[i] in desired_classes_ids:
            result_confidences.append(confidences[i])
            result_class_ids.append(class_ids[i])
            result_boxes.append(boxes[i])

    for i in range(len(result_class_ids)):
        box = result_boxes[i]
        class_id = result_class_ids[i]

        if show_intermediary:
            for point_set in LANES:
                cv2.polylines(image, [point_set], True, (0, 255, 255), 2)
            cv2.rectangle(image, box, (0, 255, 255))
            cv2.rectangle(image, (box[0], box[1] - 20), (box[0] + box[2], box[1]), (0, 255, 255), -1)
            cv2.putText(image, class_list[class_id], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))

    if show_intermediary:
        cv2.imshow("output", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    return result_boxes

In [57]:
def get_occupied_lanes(bounding_boxes, lane_polygons):
    def get_intersection_area(poly1, poly2):
        if poly1.intersects(poly2): 
            intersect = poly1.intersection(poly2).area
            return intersect
        return 0
    
    occupied_lanes_list = np.zeros(9, np.int32)
    for bb in bounding_boxes:
        intersection_areas = []
        x, y, w, h = bb
        bb_poly = Polygon([(x, y), (x+w, y), (x+w, y+h), (x, y+h)])
        for idx, lane_poly in enumerate(lane_polygons):
            intersection_area = 0
            if idx >= 3 and idx <= 5:
                intersection_area = get_intersection_area(
                    Polygon([(x, y + h//2), (x+w, y + h//2), (x+w, y+h), (x, y+h)]),
                    lane_poly
                )
            else:
                intersection_area = get_intersection_area(bb_poly, lane_poly)

            intersection_areas.append(intersection_area)

        maximum_area = max(intersection_areas)

        if maximum_area > 500:
            occupied_lanes_list[intersection_areas.index(maximum_area)] = 1
    
    return occupied_lanes_list

In [58]:
def get_formatted_predictions_for_image(img_filename, query_filename, out_path, net):
    queried_lanes = []
    with open(query_filename, 'r') as query:
        queried_lanes = query.read().splitlines()[1:]

    image = cv2.imread(img_filename)
    formatted_image = format_yolov5(image) # making the image square
    preds = get_preds(image, net)

    bounding_boxes = unwrap_detections(preds, formatted_image, image)

    occupied_lanes = get_occupied_lanes(bounding_boxes, LANE_POLYGONS)

    with open(out_path + query_filename.split('/')[-1].replace('query', 'prediction'), "a+") as f:
        f.truncate(0)
        for idx, lane_number in enumerate(queried_lanes):
            if idx == 0:
                f.write(f'{len(queried_lanes)}\n')
            f.write(f'{int(lane_number)} {occupied_lanes[int(lane_number)-1]}')
            if idx < len(queried_lanes) - 1:
                f.write('\n')

In [59]:
def get_formatted_predictions(images_path, out_path):
    out_paths = out_path.split('/')
    out_paths = [path for path in out_paths if path != '']

    for i in range(len(out_paths)):
        if not os.path.exists('/'.join(out_paths[:i+1])):
            os.mkdir('/'.join(out_paths[:i+1]))

    img_filenames = [images_path + f for f in os.listdir(images_path) if f.endswith('.jpg')]
    query_filenames = [images_path + f for f in os.listdir(images_path) if f.endswith('.txt')]

    for img_filename, query_filename in zip(img_filenames, query_filenames):
        get_formatted_predictions_for_image(img_filename, query_filename, out_path, net)

In [60]:
get_formatted_predictions('../train/Task1/', '../submission/Task1/')