In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


In [16]:
# Reshaping image (resize, reduce and expanding it)
def process_image(img):
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)
    return image

In [17]:
# Getting class name from the coco dataset (.txt)
def get_classes(file):
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

In [18]:
# Drawing the boudary boxes on image
def draw(image,boxes,scores,classes,all_classes):
    for box, score, cl in zip(boxes, scores, classes):
        x,y,w,h = box
        
        top = max(0,np.floor(x + 0.5).astype(int))
        left = max(0,np.floor(y + 0.5).astype(int))
        right = min(image.shape[1],np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0],np.floor(y + h + 0.5).astype(int))
        
        cv2.rectangle(image, (top,left), (right,bottom) ,(255,0,0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl],score),(top,left - 6),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255),1,cv2.LINE_AA)
        
        print('class {0}, score: {1:.2f}'.format(all_classes[cl],score))
        print('box coordinate x,y,w,h: {0}'.format(box))
        
    print()

In [19]:
# Process image and check for possible matches
def detect_image(image,yolo,all_classes):
    pimage = process_image(image)
    start = time.time()
    
    boxes,classes,scores = yolo.predict(pimage, image.shape)
    
    end = time.time()
    
    print('time: {0:.2f}s'.format(end - start))
#     If objects were detected, draw the boundary boxes
    if boxes is not None:
        draw(image,boxes,scores,classes,all_classes)
    
    return image

In [20]:
# For tracking
def detect_video(video,yolo,all_classes):
    video_path = os.path.join('videos','test',video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow('Detection',cv2.WINDOW_AUTOSIZE)
    
#     Saving detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)), int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    
    fourc = cv2.VideoWriter_fourcc(*'mpeg')
    
    vout = cv2.VideoWriter()
    vout.open(os.path.join('videos','res',video), fourcc,20,sz,True)
    
    while True:
        res, frame = camera.read()
        
        if not res:
            break
        
        image = detect_image(frame,yolo,all_classes)
        cv2.imshow('Detection',image)
        vout.write(image)
        
        if cv2.waitKey(110) & 0xFF == 27:
            break
            
    vout.release()
    camera.release()
    cv2.waitKey(1)

In [21]:
# Creating instance of YOLO algorithm with a certainty of 60% minimum
yolo = YOLO(0.6,0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)

In [24]:
# Detecting images (street with cars and people)
f = 'street.jpg'
path = 'images/' + f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 17.60s
class person, score: 0.96
box coordinate x,y,w,h: [1022.0618248  1579.46288586  297.71628976  813.96505237]
class car, score: 0.99
box coordinate x,y,w,h: [1399.2279768  1571.51669264  609.26771164  364.11456764]



True

In [None]:
# Detecting on video (takes more time as evaluation is carried out frame by frame)
# video = 'street.mp4'
# detect_video(video, yolo, all_classes)