Importing the necessary Libraries

In [1]:
import numpy as np
import cv2
import time

Threshold values that can be adjusted for better performance if needed

In [2]:
NMS_THRESHOLD=0.0 #Non-maximum suppression threshold for separating overlapping detections
#original was 0.3
MIN_CONFIDENCE=0.2 #Confidence score for detections
#original was 0.2

In [4]:
def pedestrian_detection(image, model, layer_name, personidz=0):
    (H, W) = image.shape[:2] #To obtain the rows and columns of the image
    results = [] #Empty list to store tuples having prediction confidence, bounding box coordinates, and centroid for each detection


    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
        swapRB=True, crop=False) #Performing Image pre-processing; mean subtraction, scaling and channel swapping
    cv2.imshow("Processed", image)
    
    
    model.setInput(blob)
    layerOutputs = model.forward(layer_name)

    boxes = []
    centroids = []
    confidences = []

    for output in layerOutputs:
        for detection in output:

            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if classID == personidz and confidence > MIN_CONFIDENCE:

                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))
    # apply non-maxima suppression to suppress weak, overlapping
    # bounding boxes
    idzs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONFIDENCE, NMS_THRESHOLD)
    # ensure at least one detection exists
    if len(idzs) > 0:
        # loop over the indexes we are keeping
        for i in idzs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            # update our results list to consist of the person
            # prediction probability, bounding box coordinates,
            # and the centroid
#             if ((w in range(20,25)) and (h in range(40,50))):
#                 continue
                
            #if (math.sqrt((w**2) + (h**2)) <= 80):
             #   continue
            res = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(res)
    # return the list of results
    return [results, confidences]

In [4]:
import glob

img_array = []
test_set = 'badminton'
root = 'C:/Users/tbukits/Documents/UAM/PDBR/dataset/' + test_set

dataIn = open(root + '/dataIn.txt', 'r')

data  = dataIn.read()

frame_ids = data.split('\n')

path = root + '/*.jpg'

for filename in glob.glob(path):
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width,height)
    img_array.append(img)


out = cv2.VideoWriter('video.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
 
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()

In [5]:
labelsPath = "coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

weights_path = "yolov4-tiny.weights"
config_path = "yolov4-tiny.cfg"

model = cv2.dnn.readNetFromDarknet(config_path, weights_path)
'''
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
'''

layer_name = model.getLayerNames()
layer_name = [layer_name[i - 1] for i in model.getUnconnectedOutLayers()]
cap = cv2.VideoCapture("video.avi")
writer = None

In [6]:
def make_output_file(frame_id, bounding_boxes, scores):
    if len(bounding_boxes) == 0:
        row = '"./Videos/' + frame_id + '";\n'
    else:
        row = '"./Videos/' + frame_id + '"; '
    index = 0
    for box in bounding_boxes:
        score = scores[index]
        top_left_x = box[1][0]
        top_left_y = box[1][1]
        width = box[1][2] - top_left_x
        height = box[1][3] - top_left_y
        if len(bounding_boxes) == 1:
            row += '(' + str(top_left_x) + ', ' + str(top_left_y) + ', ' + str(width) + ', ' + str(height) + '):' + str(score) + ';\n'
        elif index == 0:
            row += '(' + str(top_left_x) + ', ' + str(top_left_y) + ', ' + str(width) + ', ' + str(height) + '):' + str(score) + ', '
        elif index < len(bounding_boxes) - 1:
            row += '(' + str(top_left_x) + ', ' + str(top_left_y) + ', ' + str(width) + ', ' + str(height) + '):' + str(score) + ', '
        else:
            row += '(' + str(top_left_x) + ', ' + str(top_left_y) + ', ' + str(width) + ', ' + str(height) + '):' + str(score) + ';\n'
        index += 1
    return row

In [7]:
fgbg = cv2.createBackgroundSubtractorMOG2()

def backgroundSubstractor(frame):
    fgmask = fgbg.apply(frame)    
    masked_img = cv2.bitwise_and(frame, frame, mask = fgmask)
    return masked_img

In [8]:
prev_frame_time = 0
  
# used to record the time at which we processed current frame
new_frame_time = 0

file = open(test_set + '.idl', "w")

counter = 0
while True:
    (grabbed, image) = cap.read()

    if not grabbed:
        break
    #image = imutils.resize(image, width=700)
    
    image = backgroundSubstractor(image)
    
    [results, confidences] = pedestrian_detection(image, model, layer_name,
        personidz=LABELS.index("person")) #Function Call
    frame_id = frame_ids[counter]
    row = make_output_file(frame_id, results, confidences)
    file.write(row)

    
    for res in results:
        cv2.rectangle(image, (res[1][0],res[1][1]), (res[1][2],res[1][3]), (0, 255, 0), 2)
    counter += 1
        
#This bit calculates the fps and displayes on each frame        
    font = cv2.FONT_HERSHEY_SIMPLEX
        # time when we finish processing for this frame
    new_frame_time = time.time()

    # Calculating the fps
    # fps will be number of frame processed in given time frame
    # since their will be most of time error of 0.001 second
    # we will be subtracting it to get more accurate result
    fps = 1/(new_frame_time-prev_frame_time)
    prev_frame_time = new_frame_time

    # converting the fps into integer
    fps = int(fps)

    # converting the fps to string so that we can display it on frame
    # by using putText function
    fps = str(fps)

    # putting the FPS count on the frame
    cv2.putText(image, fps, (7, 70), font, 3, (100, 255, 0), 3, cv2.LINE_AA)
        
    cv2.imshow("Detection",image)
    
    cv2.imwrite("processed/"+str(counter)+".jpg", image)
    
#     cv2.imwrite('lena_opencv_red.jpg', image)

    key = cv2.waitKey(1)
    if key == 27:
        break

cap.release()
cv2.destroyAllWindows()
file.close()

In [9]:
import glob
img_array = []
# for filename in glob.glob('Videos/tud-campus-sequence/*.png'):
for filename in glob.glob('processed/*.jpg'):
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width,height)
    img_array.append(img)


out = cv2.VideoWriter('backdoor.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

for i in range(len(img_array)):
    out.write(img_array[i])
out.release()

In [10]:
print(confidences)

[]


## References
1. "Real-time Pedestrian Detection using Python & OpenCV", Data-Flair, 2023 [Online]. Available: https://data-flair.training/blogs/pedestrian-detection-python-opencv/
2. "Creating video from images using OpenCv and python", TheAILearner, 2023 [Online]. Available: https://theailearner.com/2018/10/15/creating-video-from-images-using-opencv-python/
3. "Deep learning: How OpenCV’s blobFromImage works", pyimagesearch, 2017 [Online]. Available: https://pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/
4. "Python – Displaying real time FPS at which webcam/video file is processed using OpenCV", pyimagesearch, 2023 [Online]. Available: https://www.geeksforgeeks.org/python-displaying-real-time-fps-at-which-webcam-video-file-is-processed-using-opencv/