In [1]:
import cv2
import numpy as np
import time
import os
import matplotlib.pyplot as plt 
import zipfile
print(cv2.__version__)

4.8.1


In [2]:
yolo_path = "./YOLOv4/"
config_path = yolo_path + "cfg/yolov4.cfg"
weights_path = yolo_path + "yolov4.weights"
labels_path = yolo_path + "cfg/coco.names"

LABELS = open(labels_path).read().strip().split("\n")
print(LABELS)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [3]:
net = cv2.dnn.readNet(config_path, weights_path)

In [4]:
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

In [5]:
ln = np.array(net.getLayerNames())
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]

print(ln)

['yolo_139', 'yolo_150', 'yolo_161']


In [6]:
def show_img(img):
  fig = plt.gcf()
  fig.set_size_inches(16, 10)
  plt.axis("off")
  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  plt.show()

In [7]:
def blob_imagem(net, img, mostrar_texto=True):
  start = time.time() 
  blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), swapRB=True, crop=False)
  net.setInput(blob)
  layerOutputs = net.forward(ln)
  end = time.time()
  if mostrar_texto:
    print("YOLO took {:.2f} seconds".format(end - start))
  return net, img, layerOutputs

In [8]:
def perform_detection(detection, _threshold, boxes, confidences, classesID):
  scores = detection[5:] 
  classID = np.argmax(scores)  
  confidence = scores[classID]

  if confidence > _threshold:
      box = detection[0:4] * np.array([W, H, W, H])     
      (centerX, centerY, width, height) = box.astype("int")
            
      x = int(centerX - (width / 2))
      y = int(centerY - (height / 2))

      boxes.append([x, y, int(width), int(height)])
      confidences.append(float(confidence))
      classesID.append(classID)
      
  return boxes, confidences, classesID

In [9]:
def images_function(image, i, confidences, boxes, COLORS, LABELS,classesID, show_text=True):  
  (x, y) = (boxes[i][0], boxes[i][1])
  (w, h) = (boxes[i][2], boxes[i][3])

  cor = [int(c) for c in COLORS[classesID[i]]]
  cv2.rectangle(image, (x, y), (x + w, y + h), cor, 2) 
  text = "{}: {:.4f}".format(LABELS[classesID[i]], classesID[i])
  if show_text:
    print("> " + text)
    print(x,y,w,h)
  cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, cor, 2)

  return image,x,y,w,h

In [10]:
# !wget https://github.com/gabevr/yolo/raw/master/videos/video_pessoas01.mp4

In [11]:
video_file = 'video_pessoas01.mp4'
cap = cv2.VideoCapture(video_file)
connected, video = cap.read()
video.shape
videoX, videoY, videoZ = video.shape
print(videoX, videoY, videoZ)

720 1280 3


In [12]:
def resize(width, height, max_width = 600): 
  if (width > max_width):
    proportion = width / height
    video_width = max_width
    video_height = int(video_width / proportion)
  else:
    video_width = width
    video_height = height

  return video_width, video_height

In [13]:
video_width, video_height = resize(video.shape[1], video.shape[0])
print(video_width,video_height)

600 337


In [14]:
result_file_name = 'result.avi'
fourcc = cv2.VideoWriter_fourcc(*'XVID') # MP4V
fps = 24
output_video = cv2.VideoWriter(result_file_name, fourcc, fps, (video_width, video_height))

In [15]:
threshold = 0.5
threshold_NMS = 0.3
small_font, medium_font = 0.4, 0.6
font = cv2.FONT_HERSHEY_SIMPLEX
samples_show = 20
actual_sample = 0

In [16]:
while (cv2.waitKey(1) < 0):
  connected, frame = cap.read()
  if not connected:
    break
  t = time.time()
  frame = cv2.resize(frame, (video_width, video_height))
  try:
    (H, W) = frame.shape[:2]
  except:
    print('Erro')
    continue

  imagem_cp = frame.copy() 
  net, frame, layerOutputs = blob_imagem(net, frame)
  boxes = []       
  confidences = []   
  classesID = []    

  for output in layerOutputs:
    for detection in output:
      boxes, confidences, classesID = perform_detection(detection, threshold, boxes, confidences, classesID)

  objs = cv2.dnn.NMSBoxes(boxes, confidences, threshold, threshold_NMS)

  if len(objs) > 0:
    for i in objs.flatten():
      frame, x, y, w, h = images_function(frame, i, confidences, boxes, COLORS, LABELS, classesID, show_text=False)
      object = imagem_cp[y:y + h, x:x + w]
  
  cv2.putText(frame, " frame processed in {:.2f} seconds".format(time.time() - t), 
              (20, video_height-20), font, small_font, (250, 250, 250), 0, lineType=cv2.LINE_AA)
  
  if actual_sample <= samples_show:
    cv2.imshow("video", frame) # TODO: Verify this method
    actual_sample += 1

  output_video.write(frame)

print('Done.')
output_video.release()
cv2.destroyAllWindows()

YOLO took 0.55 seconds
YOLO took 0.21 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0.17 seconds
YOLO took 0.17 seconds
YOLO took 0.16 seconds
YOLO took 0

In [17]:
!du -h result.avi

 13M	result.avi
