-
Notifications
You must be signed in to change notification settings - Fork 0
/
object_detection.py
136 lines (117 loc) · 4.1 KB
/
object_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import argparse
import time
from typing import List, Any, Union
import imutils
import cv2
import numpy as np
# object detection demo
from utils.drawing import draw_crosshair, draw_dot, draw_annotations
from utils.utils import calculate_midpoint, VideoStream, FPS, get_frame_height_width
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument(
"-p", "--prototxt", required=True, help="path to Caffe 'deploy' prototxt file"
)
ap.add_argument(
"-m", "--model", required=True, help="path to Caffe pre-trained model"
)
ap.add_argument(
"-c",
"--confidence",
type=float,
default=0.2,
help="minimum probability to filter weak detections",
)
arguments = vars(ap.parse_args())
# do not change order
AVAILABLE_CLASSES: List[Union[str, Any]] = [
"background",
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
]
DISPLAY_CLASSES: List[Union[str, Any]] = [
"person",
]
colours = np.random.uniform(0, 255, size=(len(AVAILABLE_CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(arguments["prototxt"], arguments["model"])
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
fps = FPS().start()
# loop over the frames from the video stream
while True:
frame = vs.read()
frame = imutils.resize(frame, width=1600)
# grab the frame dimensions and convert it to a blob
(height, width) = get_frame_height_width(frame)
blob = cv2.dnn.blobFromImage(
cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5
)
# pass the blob through the network and obtain the detections and
# predictions
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections:
if confidence > arguments["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
if AVAILABLE_CLASSES[idx] not in DISPLAY_CLASSES:
continue
box = detections[0, 0, i, 3:7] * np.array(
[width, height, width, height]
)
(startX, startY, endX, endY) = box.astype("int")
midpoint = calculate_midpoint(startX, startY, endX, endY)
print(f"x: {midpoint[0]}, y:{midpoint[0]}")
draw_crosshair(frame, midpoint)
draw_dot(frame, midpoint)
draw_annotations(
frame=frame,
copy=[
f"Detected: {AVAILABLE_CLASSES[idx]}",
f"Confidence: {round(confidence * 100, 2)} %",
f"Coords: x:{str(midpoint[0])}, y:{str(midpoint[1])}",
f"Average FPS: 24.2",
]
)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
cv2.destroyAllWindows()
vs.stop()