# Make the necessary imports.

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


# Let's define a few functions that we'll call later.

In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

# Let's test the model on an image and save the resultant image.

In [7]:
yolo = YOLO(0.5, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)

In [14]:
f = 'a.jpg'
# path = 'C:\Users\ub226\Desktop\GitHub_Projects\YoloV3\images\test'+f
image = cv2.imread('images/test/a.jpg')
cv2.imshow('img', image)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 8.84s
class: person, score: 0.60
box coordinate x,y,w,h: [2523.36752415 1482.90807486  621.1425662  1302.12979794]
class: bicycle, score: 0.84
box coordinate x,y,w,h: [2877.70164013 2007.04590225 1301.56436563  721.28457355]
class: bicycle, score: 0.51
box coordinate x,y,w,h: [ 816.12226367 1952.52190018 1265.25861025  818.21106863]



True

# Using the camera to use the yolo model though keep in mind that this particular version of yolo takes about 7s to process a frame and thus the video will be really laggy.

In [10]:
video_capture = cv2.VideoCapture(0)
while True:
    _, frame = video_capture.read()
    # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    canvas = detect_image(frame, yolo, all_classes)
    cv2.imshow('Video', canvas)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
video_capture.release()
cv2.destroyAllWindows()

time: 5.99s
time: 6.47s
class: person, score: 0.99
box coordinate x,y,w,h: [158.51669312 115.13115406 400.38696289 316.82581902]
class: backpack, score: 0.26
box coordinate x,y,w,h: [192.01856613 339.57638741 282.45540619  74.09663916]

time: 5.72s
class: person, score: 0.98
box coordinate x,y,w,h: [156.29650116 111.5225172  405.8543396  314.62712288]
class: backpack, score: 0.71
box coordinate x,y,w,h: [205.82166672 337.93584824 257.85541534  75.89873314]

time: 5.55s
class: person, score: 0.99
box coordinate x,y,w,h: [163.75883102 108.71416569 406.59908295 314.06367302]
class: backpack, score: 0.86
box coordinate x,y,w,h: [239.36714172 333.13925743 250.74586868  82.40627289]

time: 5.72s
class: person, score: 1.00
box coordinate x,y,w,h: [162.24330902 110.2440834  401.79824829 313.42540741]

time: 6.15s
class: person, score: 1.00
box coordinate x,y,w,h: [171.54081345 145.37488461 319.58408356 273.00164223]
class: backpack, score: 0.32
box coordinate x,y,w,h: [202.24494934 345.5513191