-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_TF_object_detection.py
306 lines (244 loc) · 11.3 KB
/
my_TF_object_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
"""A demo for object detection using TF models from the model zoo.
The intention was to have a single script able to run a model without
the need of any other utility or dependency than cv2 on top of the regular ones.
There is a lot of good code ready to use from the TF repo.
Reference for this code:
https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
The model zoo:
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
Download one of the models and load the model with --model path/to/frozen_model.pb
Optional: Add the labels in txt format
"""
import time
import argparse
from collections import deque
# Libs used for saving images and outputs
from pathlib import Path
import uuid
# For TF
import numpy as np
import tensorflow as tf
#For webcam capture and drawing boxes
import cv2
# Parameters for visualizing the labels and boxes
FONT = cv2.FONT_HERSHEY_SIMPLEX
FONT_SIZE = 0.6
FONT_THICKNESS = 1
LINE_WEIGHT = 1
SHOW_CONFIDENCE_IN_LABEL = True
IMAGE_OUTPUT_FOLDER = Path('savedimages') # for saving detected images
SAVE_RESULT_IMAGES = False #This is for saving the detected images on disk
# Use case: To be able to use a model zoo for generating data for re-trainng other models
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model', help='Path of the detection model.', required=True)
parser.add_argument(
'--label', help='Path of the labels file.')
# parser.add_argument(
# '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION',
# required=True)
parser.add_argument(
'--camera', help='Camera source (if multiple available)', type=int, required=False)
args = parser.parse_args()
# Initialize the camera
camera = args.camera if args.camera else 0
cam = cv2.VideoCapture(camera)
#cam = cv2.VideoCapture('videoplayback2.mp4') ## If you want to capture from a video
# _path to frozen detection graph.
PATH_TO_FROZEN_GRAPH = args.model
#read labels (pbtxt from model zoo)
PATH_TO_LABELS = args.label
print("Starting session")
detection_graph = tf.Graph()
sess = tf.Session(graph=detection_graph)
print("Loading frozen graph")
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Get handles to input and output tensors
print("Getting both ends of the model")
input_tensor = get_input_tensor() #The image
output_tensor = get_output_tensor() #The evaluation results
if PATH_TO_LABELS:
labels = read_label_file(PATH_TO_LABELS)
else:
labels = {}
# Initialize the timer for fps
start_time = time.time()
frame_times = deque(maxlen=40)
# Start capturing
print("Starting the camera")
#while True:
while cam.isOpened():
ret, cv2_im = cam.read()
start_inference_time = time.time() * 1000
#This is where inference happens
output_dict = identify_with_npimage(cv2_im,
sess,
output_tensor,
input_tensor,
threshold=0.05,
top_k=10)
last_inference_time = time.time() * 1000 - start_inference_time
if SHOW_CONFIDENCE_IN_LABEL:
confidence = output_dict['detection_scores']
else:
confidence = {}
real_num_detection = output_dict['num_detections']
draw_rectangles(cv2_im,
real_num_detection,
output_dict['detection_boxes'],
output_dict['detection_classes'],
confidence,
labels=labels)
frame_times.append(time.time())
fps = len(frame_times)/float(frame_times[-1] - frame_times[0] + 0.001)
fps_line = "{:.1f}fps / {:.2f}ms".format(fps, last_inference_time)
draw_text(cv2_im, fps_line + " Detections: "+ str(real_num_detection))
#draw_text(cv2_im, "{:.1f}".format(fps))
# flipping the image:
#cv2.flip(cv2_im, 1)
#resizing the image
#cv2_im = cv2.resize(cv2_im, (800, 600))
cv2.imshow('object detection', cv2_im)
if cv2.waitKey(1) & 0xFF == ord('q'):
cv2.destroyAllWindows()
exit()
#end
exit()
def get_input_tensor():
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
return image_tensor
def get_output_tensor():
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image
# coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
#detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
# detection_masks, detection_boxes, image.shape[0], image.shape[1])
#detection_masks_reframed = tf.cast(
# tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
#tensor_dict['detection_masks'] = tf.expand_dims(
# detection_masks_reframed, 0)
return tensor_dict
def identify_with_npimage(image_np, sess, tensor_dict, image_tensor, threshold=0.5, top_k=10):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Run inference
output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image_np_expanded})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.int64)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
#Not needed for this particular example
def load_pil_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
def read_label_file(file__path):
with open(file__path, 'r') as file:
lines = file.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def draw_rectangles(image_np, num_detections, boxes, classes, scores, labels={}):
min_confidence = 0.2
i = 0
im_height, im_width, channels = image_np.shape
if SAVE_RESULT_IMAGES and scores[i] > min_confidence:
image_filename = save_image(image_np)
else:
image_filename = None
for box in boxes:
if scores[i] > min_confidence:
ymin = box[0] * im_height
xmin = box[1] * im_width
ymax = box[2] * im_height
xmax = box[3] * im_width
rectangle = [[xmin, ymin], [xmax, ymax]]
# For saving the box and writing the csv-boxes for the original image
if SAVE_RESULT_IMAGES:
save_boxes(image_np, rectangle, image_filename)
if len(scores) > 0:
score = "({0:.2f})".format(scores[i])
else:
score = ""
if len(labels) > 0:
draw_single_rectangle(rectangle, image_np, labels[classes[i]] + score)
else:
draw_single_rectangle(rectangle, image_np, str(classes[i]) + score)
i += 1
if i >= num_detections:
break
def draw_single_rectangle(rectangle, image_np, label=None):
box_color = (255, 0, 0)
p1 = (int(rectangle[0][0]), int(rectangle[0][1]))
p2 = (int(rectangle[1][0]), int(rectangle[1][1]))
cv2.rectangle(image_np, p1, p2, color=box_color, thickness=LINE_WEIGHT)
if label:
size = cv2.getTextSize(label, FONT, FONT_SIZE, FONT_THICKNESS)
center = p1[0] + 5, p1[1] + 5 + size[0][1]
pt2 = p1[0] + 10 + size[0][0], p1[1] + 10 + size[0][1]
cv2.rectangle(image_np, p1, pt2, color=(255, 0, 0), thickness=-1)
cv2.putText(image_np, label, center, FONT, FONT_SIZE, (255, 255, 255),
FONT_THICKNESS, cv2.LINE_AA)
def draw_text(image_np, label, pos=0):
p1 = (0, pos*30+20)
#cv2.rectangle(image_np, (p1[0], p1[1]-20), (800, p1[1]+10), color=(0, 255, 0), thickness=-1)
cv2.putText(image_np, label, p1, FONT, FONT_SIZE, (0, 255, 0), 1, cv2.LINE_AA)
def save_image(image_np):
uuid_ = str(uuid.uuid4())
img_filename = uuid_ + '.jpg'
#The full image
image_path = str(IMAGE_OUTPUT_FOLDER / img_filename)
cv2.imwrite(image_path, image_np)
return img_filename
def save_boxes(image_np, rectangle, img_filename):
p1 = (int(rectangle[0][0]), int(rectangle[0][1]))
p2 = (int(rectangle[1][0]), int(rectangle[1][1]))
uuid_ = str(uuid.uuid4())
facebox_filename = uuid_ + '-box.jpg'
facebox_path = str(IMAGE_OUTPUT_FOLDER / facebox_filename)
csv_filename = "image_list.csv"
csv_path = str(IMAGE_OUTPUT_FOLDER / csv_filename)
#The box content only for training a classification model
face_img = image_np[p1[1]:p2[1], p1[0]:p2[0]]
cv2.imwrite(facebox_path, face_img)
#The box coordinates + filename for retraining an object detection model
# (This should be a sub process anyway)
(h, w, d) = image_np.shape
# filename width height class xmin ymin xmax ymax
newline = img_filename + ",{0},{1},tbd,{2},{3},{4},{5}\n".format(w, h, p1[0], p1[1], p2[0], p2[1])
with open(csv_path, "a+") as file:
file.write(newline)
file.close()
if __name__ == '__main__':
main()