\## **A YOLO V-3 BASED HUMAN DISTANCE MONITORING SYSTEM**


```
Hasan Tahsin Rafsan
232137
Section - A
PMIT Program
Admission Intake Summer 2023
Institute of Information Technology
Jahangirnagar University
```

```

```

**1. Mount Google Drive for Google Colaboratory**

---



In [None]:
from google.colab import drive
drive.mount('/content/drive')

**2. Check Python Version with Requirements**

---



In [None]:
!python --version

**3. Creating The People Detection Function**

---



In [None]:
import numpy as np
import cv2

MIN_DISTANCE = 50
MIN_CONF = 0.3
NMS_THRESH = 0.3

# grab the dimensions of the frame based on height weight & initialize the list of results
def detect_people(frame, net, ln, personIdx=0):
    (H, W) = frame.shape[:2]
    results = []
    """
		1. created a blob from the input frame
		2. perform a forward pass of the YOLO object detector
		3. giving us bounding boxes & associated probabilities
		"""
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    # initialize our lists of detected bounding boxes, centroids & confidences
    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:

        # loop over each of the detections
        for detection in output:

            # extract the class ID & confidence (probability) of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter detections by (a) ensuring that the object detected was a person & (b) that the minimum confidence is met
            if classID == personIdx and confidence > MIN_CONF:

                # YOLO returns the center (x,y) coordinates of bounding box with the boxes width & height
                # scale the bounding box coordinates back relative to the size of the image
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x,y) coordiates to derive the top & left side of bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update our list of bounding box coordinates, centroids & confidences
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

		# apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:

        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # update our results list to consist of the person prediction probability, bounding box coordinates & the centroid
            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    return results


**4. Main Cofiguration File**

---



In [None]:
from google.colab.patches import cv2_imshow
from scipy.spatial import distance as dist
import os
import cv2
import imutils
import numpy as np
from urllib.request import urlretrieve
import csv
from datetime import datetime

run_time = datetime.now().strftime("%Y%m%d_%H%M%S")

# Main location path inside Gdrive

main_path = "/content/drive/MyDrive/human-distance-detector"
base_link = "https://gist.githubusercontent.com/htrafsan/9970463655660122974949a03369664a/raw"


# Load Required Files

cocoPath = f"{main_path}/yolo-coco/coco.names"
coco_cfg = f"{main_path}/yolo-coco/yolov3.cfg"
coco_weights = f"{main_path}/yolo-coco/yolov3.weights"

if os.path.isfile(cocoPath):
    pass
else:
    print("[-] coco.names not found. Downloading..")
    urlretrieve(f"{base_link}/coco.names", cocoPath)

LABELS = open(cocoPath).read().strip().split("\n")

if os.path.isfile(coco_cfg):
    pass
else:
    print("[-] coco.cfg not found. Downloading..")
    urlretrieve(f"{base_link}/yolov3.cfg", coco_cfg)
if os.path.isfile(coco_weights):
    pass
else:
    print("[!] Coco Weights Not Found.")
    print(f"[!] Save inside {coco_weights}")
    os.sys.exit()

# Create output if it does not exist
output_dir = f"{main_path}/output"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


**5. Grab Frames from Video & Make Prediction Measuring Distances of Detected People**

---



In [None]:
# human_distance_detector.py

# path to input video file
input_name = f'{main_path}/my_input_cctv.mp4'

# path to output video file
output_name = f'{main_path}/output/train_{run_time}.avi'

# path to output csv file
csv_filename = f'{main_path}/output/train_{run_time}.csv'

# show each frame on display (true, false)
screen_display = True

# data for the output frames
frame_number = 0 # base frame num=0
frame_results = []

# Load Yolo Detector
print("[INFO] loading YOLO from disk...")
net= cv2.dnn.readNetFromDarknet(coco_cfg, coco_weights)

# determine only the Output layer names
ln= net.getLayerNames()
ln= [ln[i-1] for i in net.getUnconnectedOutLayers()]

# initialize the video stream & pointer to output video file
print("[INFO] accessing video stream...")
vs= cv2.VideoCapture(input_name if input_name else 0)
writer= None


# loop over the frames from the video stream
while True:
        # read the next frame from the file
        (grabbed, frame) = vs.read()

        # if the frame was not grabbed then we reached the end of the video
        if not grabbed:
                break

        # resize the frame & detect only person in it
        frame= imutils.resize(frame, width=700)
        results= detect_people(frame, net, ln, personIdx=LABELS.index("person"))

        # initialize the set of indexes that violate the minimum safe distance
        violate = set()

        # ensure there is at least 2 people detections for computing pairwise distance maps
        if len(results) >=2:

                # extract all centroids from the results & compute the Euclidean distances between all pairs of centroids
                centroids = np.array([r[2] for r in results])
                D= dist.cdist(centroids, centroids, metric="euclidean")

                # loop over the upper triangular of the distance matrix [i, j] i row j col
                for i in range(0, D.shape[0]):
                        for j in range(i+1, D.shape[1]):
                                # check if the distance between any 2 centroid pairs is less than min dist or pixelese here
                                if D[i,j] <MIN_DISTANCE:
                                        # update our violation set with the indexes of the centroid pairs
                                        violate.add(i)
                                        violate.add(j)

        # loop over the results
        for (i, (prob, bbox, centroid)) in enumerate(results):
                # extract the bounding box & centroid coordinates
                (startX, startY, endX, endY) =bbox
                (cX,cY) =centroid

                # default annonation color is green
                color= (0,255,0)

                # if the index pair exists within the violation set then update the color RED
                if i in violate:
                        color= (0,0,255)

                # draw a bounding box around the person & the centroid coordinates of the person
                cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                cv2.circle(frame, (cX, cY), 5, color, 1)


        num_persons = len(results)
        num_green = len(results) - len(violate)
        num_red = len(violate)
        frame_results.append([frame_number, num_persons, num_green, num_red])
        frame_number += 1

        text_green = "Safe: {}".format(num_green)
        cv2.putText(frame, text_green, (10, frame.shape[0] - 55),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 0), 3)

        text_red = "Violations: {}".format(num_red)
        cv2.putText(frame, text_red, (10, frame.shape[0] - 25),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)

        # check to see if the output frame should be displayed to our screen
        if screen_display:
                cv2_imshow(frame)

        # if an output video file path has been supplied & the video writer has not been initialized, do so now
        if output_name != "" and writer is None:
                # initialize video writer
                fourcc= cv2.VideoWriter_fourcc(*"MJPG")
                writer= cv2.VideoWriter(output_name, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

        # if the video writer is not None, write the frame to the output video file
        if writer is not None:
                writer.write(frame)


# Save results in a csv file
with open(csv_filename, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['frame', 'total_persons', 'safe', 'violations'])
    writer.writerows(frame_results)



**6. For B&W version**

---



In [None]:
# human_distance_detector.py

# path to input video file
input_name = f'{main_path}/my_input_cctv_bw.mp4'

# path to output video file
output_name = f'{main_path}/output/bw_{run_time}.avi'

# path to output csv file
csv_filename = f'{main_path}/output/bw_{run_time}.csv'

# show each frame on display (true, false)
screen_display = True

# data for the output frames
frame_number = 0 # base frame num=0
frame_results = []

# Load Yolo Detector
print("[INFO] loading YOLO from disk...")
net= cv2.dnn.readNetFromDarknet(coco_cfg, coco_weights)

# determine only the Output layer names
ln= net.getLayerNames()
ln= [ln[i-1] for i in net.getUnconnectedOutLayers()]

# initialize the video stream & pointer to output video file
print("[INFO] accessing video stream...")
vs= cv2.VideoCapture(input_name if input_name else 0)
writer= None


# loop over the frames from the video stream
while True:
        # read the next frame from the file
        (grabbed, frame) = vs.read()

        # if the frame was not grabbed then we reached the end of the video
        if not grabbed:
                break

        # resize the frame & detect only person in it
        frame= imutils.resize(frame, width=700)
        results= detect_people(frame, net, ln, personIdx=LABELS.index("person"))

        # initialize the set of indexes that violate the minimum safe distance
        violate = set()

        # ensure there is at least 2 people detections for computing pairwise distance maps
        if len(results) >=2:

                # extract all centroids from the results & compute the Euclidean distances between all pairs of centroids
                centroids = np.array([r[2] for r in results])
                D= dist.cdist(centroids, centroids, metric="euclidean")

                # loop over the upper triangular of the distance matrix [i, j] i row j col
                for i in range(0, D.shape[0]):
                        for j in range(i+1, D.shape[1]):
                                # check if the distance between any 2 centroid pairs is less than min dist or pixelese here
                                if D[i,j] <MIN_DISTANCE:
                                        # update our violation set with the indexes of the centroid pairs
                                        violate.add(i)
                                        violate.add(j)

        # loop over the results
        for (i, (prob, bbox, centroid)) in enumerate(results):
                # extract the bounding box & centroid coordinates
                (startX, startY, endX, endY) =bbox
                (cX,cY) =centroid

                # default annonation color is green
                color= (0,255,0)

                # if the index pair exists within the violation set then update the color RED
                if i in violate:
                        color= (0,0,255)

                # draw a bounding box around the person & the centroid coordinates of the person
                cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                cv2.circle(frame, (cX, cY), 5, color, 1)


        num_persons = len(results)
        num_green = len(results) - len(violate)
        num_red = len(violate)
        frame_results.append([frame_number, num_persons, num_green, num_red])
        frame_number += 1

        text_green = "Safe: {}".format(num_green)
        cv2.putText(frame, text_green, (10, frame.shape[0] - 55),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 0), 3)

        text_red = "Violations: {}".format(num_red)
        cv2.putText(frame, text_red, (10, frame.shape[0] - 25),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)

        # check to see if the output frame should be displayed to our screen
        if screen_display:
                cv2_imshow(frame)

        # if an output video file path has been supplied & the video writer has not been initialized, do so now
        if output_name != "" and writer is None:
                # initialize video writer
                fourcc= cv2.VideoWriter_fourcc(*"MJPG")
                writer= cv2.VideoWriter(output_name, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

        # if the video writer is not None, write the frame to the output video file
        if writer is not None:
                writer.write(frame)


# Save results in a csv file
with open(csv_filename, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['frame', 'total_persons', 'safe', 'violations'])
    writer.writerows(frame_results)



**7. Reat CSV & Plot graph**

---



In [None]:
import csv
import matplotlib.pyplot as plt

train_csv = f'{main_path}/output/train_{run_time}.csv'
bw_csv = f'{main_path}/output/bw_{run_time}.csv'

tp_total, tn_total, fp_total, fn_total = 0, 0, 0, 0
frame_count = 0

with open(train_csv, 'r') as f_train, open(bw_csv, 'r') as f_bw:
    train_reader = csv.DictReader(f_train)
    bw_reader = csv.DictReader(f_bw)
    for train_row, bw_row in zip(train_reader, bw_reader):
        gt_safe = int(train_row['safe'])
        gt_viol = int(train_row['violations'])
        pred_safe = int(bw_row['safe'])
        pred_viol = int(bw_row['violations'])

        # True Positives: predicted violation, actually violation
        tp = min(gt_viol, pred_viol)
        # True Negatives: predicted safe, actually safe
        tn = min(gt_safe, pred_safe)
        # False Positives: predicted violation, actually safe
        fp = max(0, pred_viol - gt_viol)
        # False Negatives: predicted safe, actually violation
        fn = max(0, pred_safe - gt_safe)

        tp_total += tp
        tn_total += tn
        fp_total += fp
        fn_total += fn
        frame_count += 1

# Metrics calculation
precision = tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0
recall = tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
tnr = tn_total / (tn_total + fp_total) if (tn_total + fp_total) > 0 else 0
accuracy = (tp_total + tn_total) / (tp_total + tn_total + fp_total + fn_total) if (tp_total + tn_total + fp_total + fn_total) > 0 else 0

print(f"Total frames: {frame_count}")
print(f"TP: {tp_total}")
print(f"TN: {tn_total}")
print(f"FP: {fp_total}")
print(f"FN: {fn_total}")
print(f"Precision: {precision:.4f}")
print(f"Recall (MOU): {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")
print(f"TNR (True Negative Rate): {tnr:.4f}")
print(f"Accuracy: {accuracy:.4f}")

# ----------- Bar Graph -----------

metrics = {
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'TNR': tnr,
    'Accuracy': accuracy
}
counts = {
    'TP': tp_total,
    'TN': tn_total,
    'FP': fp_total,
    'FN': fn_total
}

plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.bar(metrics.keys(), metrics.values(), color='skyblue')
plt.ylim(0, 1.05)
plt.title('Detection Metrics')
plt.ylabel('Score')
plt.xticks(rotation=45)

plt.subplot(1,2,2)
plt.bar(counts.keys(), counts.values(), color='orange')
plt.title('Counts')
plt.ylabel('Count')

plt.suptitle('Social Distancing Detection Report')
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

**8. Reverse Training**

---



In [None]:
import csv
import matplotlib.pyplot as plt

train_csv = f'{main_path}/output/train_{run_time}.csv'
bw_csv = f'{main_path}/output/bw_{run_time}.csv'

tp_total, tn_total, fp_total, fn_total = 0, 0, 0, 0
frame_count = 0

with open(bw_csv, 'r') as f_train, open(train_csv, 'r') as f_bw:
    train_reader = csv.DictReader(f_train)
    bw_reader = csv.DictReader(f_bw)
    for train_row, bw_row in zip(train_reader, bw_reader):
        gt_safe = int(train_row['safe'])
        gt_viol = int(train_row['violations'])
        pred_safe = int(bw_row['safe'])
        pred_viol = int(bw_row['violations'])

        # True Positives: predicted violation, actually violation
        tp = min(gt_viol, pred_viol)
        # True Negatives: predicted safe, actually safe
        tn = min(gt_safe, pred_safe)
        # False Positives: predicted violation, actually safe
        fp = max(0, pred_viol - gt_viol)
        # False Negatives: predicted safe, actually violation
        fn = max(0, pred_safe - gt_safe)

        tp_total += tp
        tn_total += tn
        fp_total += fp
        fn_total += fn
        frame_count += 1

# Metrics calculation
precision = tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0
recall = tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
tnr = tn_total / (tn_total + fp_total) if (tn_total + fp_total) > 0 else 0
accuracy = (tp_total + tn_total) / (tp_total + tn_total + fp_total + fn_total) if (tp_total + tn_total + fp_total + fn_total) > 0 else 0

print(f"Total frames: {frame_count}")
print(f"TP: {tp_total}")
print(f"TN: {tn_total}")
print(f"FP: {fp_total}")
print(f"FN: {fn_total}")
print(f"Precision: {precision:.4f}")
print(f"Recall (MOU): {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")
print(f"TNR (True Negative Rate): {tnr:.4f}")
print(f"Accuracy: {accuracy:.4f}")

# ----------- Bar Graph -----------

metrics = {
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'TNR': tnr,
    'Accuracy': accuracy
}
counts = {
    'TP': tp_total,
    'TN': tn_total,
    'FP': fp_total,
    'FN': fn_total
}

plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.bar(metrics.keys(), metrics.values(), color='skyblue')
plt.ylim(0, 1.05)
plt.title('Detection Metrics')
plt.ylabel('Score')
plt.xticks(rotation=45)

plt.subplot(1,2,2)
plt.bar(counts.keys(), counts.values(), color='orange')
plt.title('Counts')
plt.ylabel('Count')

plt.suptitle('Social Distancing Detection Report (reversed)')
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()