In [2]:
import os
import cv2
import numpy as np
from shapely.geometry import Polygon
from moviepy.editor import VideoFileClip
from playsound import playsound
# pip install playsound==1.2.2


In [3]:
input_height = 480
input_width = 640

# Initialize the parameters

confThreshold = 0.20  #Confidence threshold, decrease if not detecting
iouThreshold = 0.30
inpWidth = 416       #Width of network's input image
inpHeight = 416      #Height of network's input image


red = (0, 0, 255)
green = (0, 255, 0)
font = cv2.FONT_HERSHEY_SIMPLEX

fontScale = 0.5
thickness = 1

triangle = []

In [4]:
# Load names of classes
classesFile = "obj.names"
classes_yolo = None
with open(classesFile, 'rt') as f:
    classes_yolo = f.read().rstrip('\n').split('\n')

# Give the configuration and weight files for the model and load the network using them.
modelConfiguration_yolo = "yolov4-obj.cfg";
modelWeights_yolo = "yolov4-obj_best.weights";
net_yolo = cv2.dnn.readNetFromDarknet(modelConfiguration_yolo, modelWeights_yolo)
net_yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net_yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [5]:
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i-1 ] for i in net.getUnconnectedOutLayers()]

In [6]:
def calculate_iou(box_1, box_2):
    poly_1 = Polygon(box_1)
    poly_2 = Polygon(box_2)
    iou = poly_1.intersection(poly_2).area / poly_1.union(poly_2).area
    return iou

In [7]:
def get_bounding_box(outs,classes):
    classIds=[]
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                
                center_x = int(detection[0] * input_width)
                center_y = int(detection[1] * input_height)
                width = int(detection[2] * input_width)
                height = int(detection[3] * input_height)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                boxes.append([left, top, width, height])


    class_and_box_dict = {}
    for idx in range(len(classIds)):
        box = boxes[idx]
        bottom_left_x = box[0]
        bottom_left_y = box[1]
        width = box[2]
        height = box[3]

        bottom_left = [bottom_left_x,bottom_left_y]
        bottom_right = [bottom_left_x+width,bottom_left_y]

        top_left = [bottom_left_x,bottom_left_y+height]
        top_right = [bottom_left_x+width,bottom_left_y+height]

        class_name = classes[classIds[idx]]
        if class_name in class_and_box_dict.keys():
            class_and_box_dict[class_name].append([top_left,top_right,bottom_right,bottom_left])
        else:
            class_and_box_dict[class_name] = [[top_left,top_right,bottom_right,bottom_left]]
    return class_and_box_dict

In [8]:
def plot_box(frame1,class_and_box_dict):
    
    for key in class_and_box_dict.keys():
        for idx in range(len(class_and_box_dict[key])):
            box = class_and_box_dict[key][idx]
            top_left,top_right,bottom_right,bottom_left = box
            org = [top_left[0],top_left[1]-10]
            color=green
            frame1 = cv2.rectangle(frame1,bottom_left,top_right,color,2)
            frame1 = cv2.putText(frame1, key, org, font, 1, red, 2, cv2.LINE_AA) 
        try:
            audio = f"Music/{key}.mp3"
            playsound(audio,False)
        except Exception as e:
            print(e) 
#         print(key)
    return frame1

In [9]:
def check_iou_score(class_and_box_dict):
    for key in class_and_box_dict.keys():
        if len(class_and_box_dict[key])>1:
            all_boxes = class_and_box_dict[key]
            pop_idx = []
            final_boxes = []
            for i in range(len(all_boxes)):
                for j in range(i+1,len(all_boxes)):
                    iou_score = calculate_iou(all_boxes[i],all_boxes[j])
                    if iou_score >= iouThreshold:
                        pop_idx.append(i)
            for m in range(len(all_boxes)):
                if m not in pop_idx:
                    final_boxes.append(all_boxes[m])

            class_and_box_dict[key] = final_boxes
    return class_and_box_dict

In [10]:
def detect_obj(img):

    frame =cv2.resize(img,(input_width,input_height))
    blob = cv2.dnn.blobFromImage(frame, 1/255, (input_height, input_width), [0,0,0], 1, crop=False)
    net_yolo.setInput(blob)

    outs = net_yolo.forward(getOutputsNames(net_yolo))
    class_and_box_dict = get_bounding_box(outs,classes_yolo)
    class_and_box_dict = check_iou_score(class_and_box_dict)


    frame = plot_box(frame,class_and_box_dict)
    return frame


In [11]:
def main(frame):
    img = detect_obj(frame)
#     frame = plot_lanes(img)
    return img

In [12]:

#camera = cv2.VideoCapture("result.mp4")
camera = cv2.VideoCapture(0)
ret, frame = camera.read()

# check of live video
while(camera.isOpened()):
 # Capture frame-by-frame
    ret, frame = camera.read()
    if ret == True:
        # Display the resulting frame
        frame = main(frame)
        cv2.imshow('Frame',frame)
        # Press Q on keyboard to  exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
    # Break the loop
    else:
        break

camera.release()
cv2.destroyAllWindows()


In [None]:

# check for recorded video
video_output = 'result1.mp4'
clip1 = VideoFileClip("result.mp4")

clip = clip1.fl_image(main) #NOTE: it should be in BGR format
clip.write_videofile(video_output, audio=True)

Moviepy - Building video result1.mp4.
MoviePy - Writing audio in result1TEMP_MPY_wvf_snd.mp3


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video result1.mp4



t:  27%|██████████████████▎                                                 | 94/350 [02:00<06:16,  1.47s/it, now=None]