# Team : Hari Kiran (110583942) & Srija Kandula (110587710)

In [1]:
# pip install --upgrade opencv-python

In [2]:
import cv2
import numpy as np
import time
import math

labelsPath = "./coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

weightsPath = "./yolov3.weights"
configPath = "./yolov3.cfg"

net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

In [3]:
#video
def process_video(input_video_path, output_video_path='output.mp4'):
    #Loading the video input
    cap = cv2.VideoCapture(input_video_path)
    has_frame, frame = cap.read()

    #Loading our model(YOLO)
    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
    
    #Initialising the video writer
    vid_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame.shape[1], frame.shape[0]))

    while True:
        ret, image = cap.read()
        if not ret:
            break

        #Preprocessing the image
        image = cv2.resize(image, (640, 360))
        (image_height, image_width) = image.shape[:2]

        #Layer names
        output_layer_names = net.getLayerNames()
        output_layer_names = [output_layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

        blob = cv2.dnn.blobFromImage(image, 1 / 300.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)

        #performing interference
        start_time = time.time()
        layer_outputs = net.forward(output_layer_names)
        end_time = time.time()
        print("Frame Prediction Time : {:.2f} seconds".format(end_time - start_time))

        #Social distance detection logic
        boxes = []
        confidences = []
        class_ids = []
        for output in layer_outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.1 and class_id == 0:
                    box = detection[0:4] * np.array([image_width, image_height, image_width, image_height])
                    (center_x, center_y, box_width, box_height) = box.astype("int")
                    x = int(center_x - (box_width / 2))
                    y = int(center_y - (box_height / 2))
                    boxes.append([x, y, int(box_width), int(box_height)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        #Applying non-max suppression
        indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)
        overlapping_indices = []
        x_coordinates = []
        y_coordinates = []

        if len(indices) > 0:
            for i in indices.flatten():
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                x_coordinates.append(x)
                y_coordinates.append(y)
                cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        #Checking social distance violation
        distances = []
        violations = []
        for i in range(0, len(x_coordinates) - 1):
            for j in range(i + 1, len(x_coordinates)):
                x_distance = (x_coordinates[j] - x_coordinates[i])
                y_distance = (y_coordinates[j] - y_coordinates[i])
                distance = math.sqrt(x_distance * x_distance + y_distance * y_distance)
                distances.append(distance)
                if distance <= 100.0:
                    overlapping_indices.append(i)
                    overlapping_indices.append(j)
                    violations.append(i)
                    violations.append(j)
                overlapping_indices = list(dict.fromkeys(overlapping_indices))

        #highlighting the violations
        for i in overlapping_indices:
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
            cv2.putText(image, "Alert", (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

        #Counting the persons in a particular frame
        person_count = len(indices)
        cv2.putText(image, f'Persons: {person_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

        #Results
        cv2.imshow("Social Distance Detector Portal", image)
        if cv2.waitKey(1) & 0xFF == ord('e'):
            break
        vid_writer.write(image)

    #releasing the resources
    vid_writer.release()
    cap.release()
    cv2.destroyAllWindows()

In [4]:
#image
def process_image(image_path):
    #Loading the image
    image = cv2.imread(image_path)
    (image_height, image_width) = image.shape[:2]
    
    #Loading our model(YOLO)
    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
    
    #exporting the unconnected layers
    output_layer_names = net.getLayerNames()
    output_layer_names = [output_layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    #pre-processing the image
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)

    #performing interference
    start_time = time.time()
    layer_outputs = net.forward(output_layer_names)
    end_time = time.time()
    print("Time taken for the prediction: {:.2f} secs".format(end_time - start_time))

    #social distance detection logic
    boxes = []
    confidences = []
    class_ids = []
    color_alert = (0, 0, 255)
    color_ok = (0, 255, 0)
    text = ""  

    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:
                box = detection[0:4] * np.array([image_width, image_height, image_width, image_height])
                (center_x, center_y, box_width, box_height) = box.astype("int")
                x = int(center_x - (box_width / 2))
                y = int(center_y - (box_height / 2))
                boxes.append([x, y, int(box_width), int(box_height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    #Applying non-max suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)
    overlapping_indices = []
    x_coordinates = []
    y_coordinates = []

    if len(indices) > 0:
        for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            x_coordinates.append(x)
            y_coordinates.append(y)
            cv2.rectangle(image, (x, y), (x + w, y + h), color_ok, 2)

    #Violations detection
    distances = []
    violations = []
    for i in range(0, len(x_coordinates) - 1):
        for j in range(i + 1, len(x_coordinates)):
            x_distance = (x_coordinates[j] - x_coordinates[i])
            y_distance = (y_coordinates[j] - y_coordinates[i])
            distance = math.sqrt(x_distance * x_distance + y_distance * y_distance)
            distances.append(distance)
            if distance <= 100.0:    #threshold is 100
                overlapping_indices.append(i)
                overlapping_indices.append(j)
                violations.append(i)
                violations.append(j)
            overlapping_indices = list(dict.fromkeys(overlapping_indices))

    #violations highlighting
    for i in overlapping_indices:
        (x, y) = (boxes[i][0], boxes[i][1])
        (w, h) = (boxes[i][2], boxes[i][3])
        cv2.rectangle(image, (x, y), (x + w, y + h), color_alert, 2)
        text = "Alert"

    #Results
    cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_alert, 2)
    cv2.imshow("Social Distance Detector Portal", image)
    cv2.imwrite('output_image.jpg', image)
    cv2.waitKey()

In [5]:
#live cam
def process_live_cam(yolo_net):
    video_capture = cv2.VideoCapture(0)

    while video_capture.isOpened():
        #frames
        ret, frame = video_capture.read()
        (frame_height, frame_width) = frame.shape[:2]

        #preprocessing the image
        yolo_layer_names = yolo_net.getLayerNames()
        yolo_output_layer_names = [yolo_layer_names[i - 1] for i in yolo_net.getUnconnectedOutLayers()]
        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        yolo_net.setInput(blob)

        #preforming interference
        yolo_outputs = yolo_net.forward(yolo_output_layer_names)

        boxes = []
        confidences = []
        class_ids = []

        #detecting the person
        for output in yolo_outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.1 and class_id == 0:
                    box = detection[0:4] * np.array([frame_width, frame_height, frame_width, frame_height])
                    (center_x, center_y, box_width, box_height) = box.astype("int")
                    x_coord = int(center_x - (box_width / 2))
                    y_coord = int(center_y - (box_height / 2))
                    boxes.append([x_coord, y_coord, int(box_width), int(box_height)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        #Applying non-max suppression
        suppressed_indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)
        non_social_distance_indices = []

        #violations detection
        for i in range(0, len(boxes) - 1):
            for j in range(i + 1, len(boxes)):
                x_distance = (boxes[j][0] - boxes[i][0])
                y_distance = (boxes[j][1] - boxes[i][1])
                distance = math.sqrt(x_distance * x_distance + y_distance * y_distance)
                if distance <= 100:      #threshold for the distance
                    non_social_distance_indices.append(i)
                    non_social_distance_indices.append(j)

        non_social_distance_indices = list(dict.fromkeys(non_social_distance_indices))

        color_alert = (0, 0, 255)
        color_ok = (0, 255, 0)

        #boxes for vialations
        for i in range(len(boxes)):
            (x, y, w, h) = (boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3])
            color = color_alert if i in non_social_distance_indices else color_ok
            text = 'Alert' if i in non_social_distance_indices else 'OK'

            #Suppressed boxes
            if i in suppressed_indices:
                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        cv2.imshow("Social Distance Detector Portal", frame)

        if cv2.waitKey(1) & 0xFF == ord('e'):
            break

    video_capture.release()
    cv2.destroyAllWindows()

In [6]:
#Main program implementation
print("Choose an option:")
print("1. Image implementation")
print("2. Video implementation")
print("3. Live Cam implementation")

choice = input("Enter the choice(1/2/3): ")

if choice == '1':
    image_path = input("Enter the path of the image file: ")
    process_image(image_path)
elif choice == '2':
    video_path = input("Enter the path of the video file: ")
    process_video(video_path)
elif choice == '3':
    process_live_cam(net)
else:
    print("Invalid choice. Please enter either 1, 2, or 3.")

Choose an option:
1. Image implementation
2. Video implementation
3. Live Cam implementation
Enter the choice(1/2/3): 2
Enter the path of the video file: ./video2.mp4
Frame Prediction Time : 1.46 seconds
Frame Prediction Time : 0.56 seconds
Frame Prediction Time : 0.52 seconds
Frame Prediction Time : 0.53 seconds
Frame Prediction Time : 0.59 seconds
Frame Prediction Time : 0.59 seconds
Frame Prediction Time : 0.54 seconds
Frame Prediction Time : 0.44 seconds
Frame Prediction Time : 0.44 seconds
Frame Prediction Time : 0.44 seconds
Frame Prediction Time : 0.47 seconds
Frame Prediction Time : 0.47 seconds
Frame Prediction Time : 0.47 seconds
Frame Prediction Time : 0.45 seconds
Frame Prediction Time : 0.50 seconds
Frame Prediction Time : 0.46 seconds
Frame Prediction Time : 0.45 seconds
Frame Prediction Time : 0.47 seconds
Frame Prediction Time : 0.46 seconds
Frame Prediction Time : 0.45 seconds
Frame Prediction Time : 0.49 seconds
Frame Prediction Time : 0.47 seconds
Frame Prediction Ti