In [1]:
import numpy as np
import cv2
import time

In [2]:
video = cv2.VideoCapture('videos/traffic-sign-to-test.mp4')

writer = None
h, w = None, None

In [4]:
with open('cfg\classes.names') as f:
    labels = [line.strip() for line in f]


# Loading trained YOLO
network = cv2.dnn.readNetFromDarknet('cfg\yolov4-tiny.cfg',
                                     'cfg\yolov4-tiny.weights')

layers_names_all = network.getLayerNames()

# Getting only output layers' names that we need from YOLO v3 algorithm
# with function that returns indexes of layers with unconnected outputs
layers_names_output = \
    [layers_names_all[i - 1] for i in network.getUnconnectedOutLayers()]

# Setting minimum probability to eliminate weak predictions
probability_minimum = 0.5

# Setting threshold for filtering weak bounding boxes with non-maximum suppression
threshold = 0.3

colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

In [5]:
f = 0
t = 0

while True:
    ret, frame = video.read()
    if not ret:
        break
        
    if w is None or h is None:
        # Slicing from tuple only first two elements
        h, w = frame.shape[:2]


    # Getting blob from current frame
    # The 'cv2.dnn.blobFromImage' function returns 4-dimensional blob from current
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
                                 swapRB=True, crop=False)



    network.setInput(blob)  # setting blob as input to the network
    start = time.time()
    output_from_network = network.forward(layers_names_output)
    end = time.time()

    # Increasing counters for frames and total time
    f += 1
    t += end - start

    print('Frame number {0} took {1:.5f} seconds'.format(f, end - start))

    bounding_boxes = []
    confidences = []
    class_numbers = []

    for result in output_from_network:
        for detected_objects in result:
            
            scores = detected_objects[5:]
            # Getting index of the class with the maximum value of probability
            class_current = np.argmax(scores)
            # Getting value of probability for defined class
            confidence_current = scores[class_current]

            # Eliminating weak predictions with minimum probability
            if confidence_current > probability_minimum:
                # Scaling bounding box coordinates to the initial frame size
                # YOLO data format keeps coordinates for center of bounding box
                # and its current width and height
                # That is why we can just multiply them elementwise
                # to the width and height
                # of the original frame and in this way get coordinates for center
                # of bounding box, its width and height for original frame
                box_current = detected_objects[0:4] * np.array([w, h, w, h])

                # Now, from YOLO data format, we can get top left corner coordinates
                # that are x_min and y_min
                x_center, y_center, box_width, box_height = box_current
                x_min = int(x_center - (box_width / 2))
                y_min = int(y_center - (box_height / 2))

                # Adding results into prepared lists
                bounding_boxes.append([x_min, y_min,
                                       int(box_width), int(box_height)])
                confidences.append(float(confidence_current))
                class_numbers.append(class_current)


    # Implementing non-maximum suppression of given bounding boxes
    # With this technique we exclude some of bounding boxes if their
    # corresponding confidences are low or there is another
    # bounding box for this region with higher confidence

    results = cv2.dnn.NMSBoxes(bounding_boxes, confidences,
                               probability_minimum, threshold)


    if len(results) > 0:
        for i in results.flatten():
            # Getting current bounding box coordinates,
            # its width and height
            x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
            box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]

            colour_box_current = colours[class_numbers[i]].tolist()

            cv2.rectangle(frame, (x_min, y_min),
                          (x_min + box_width, y_min + box_height),
                          colour_box_current, 2)

            # Preparing text with label and confidence for current bounding box
            text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
                                                   confidences[i])
            
            cv2.putText(frame, text_box_current, (x_min, y_min - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)


    if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter('videos/result-traffic-cars.mp4', fourcc, 30,
                                 (frame.shape[1], frame.shape[0]), True)

    writer.write(frame)



print()
print('Total number of frames', f)
print('Total amount of time {:.5f} seconds'.format(t))
#print('FPS:', round((f / t), 1))


# Releasing video reader and writer
video.release()
writer.release()


Frame number 1 took 0.10210 seconds
Frame number 2 took 0.03806 seconds
Frame number 3 took 0.03706 seconds
Frame number 4 took 0.04417 seconds
Frame number 5 took 0.04508 seconds
Frame number 6 took 0.03968 seconds
Frame number 7 took 0.04139 seconds
Frame number 8 took 0.04308 seconds
Frame number 9 took 0.03860 seconds
Frame number 10 took 0.03909 seconds
Frame number 11 took 0.03762 seconds
Frame number 12 took 0.03756 seconds
Frame number 13 took 0.03608 seconds
Frame number 14 took 0.03698 seconds
Frame number 15 took 0.03857 seconds
Frame number 16 took 0.03827 seconds
Frame number 17 took 0.04008 seconds
Frame number 18 took 0.20336 seconds
Frame number 19 took 0.07463 seconds
Frame number 20 took 0.05259 seconds
Frame number 21 took 0.04559 seconds
Frame number 22 took 0.03663 seconds
Frame number 23 took 0.03808 seconds
Frame number 24 took 0.03809 seconds
Frame number 25 took 0.04107 seconds
Frame number 26 took 0.03707 seconds
Frame number 27 took 0.03807 seconds
Frame numb