You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Just for a little of background, I trained a custom object detector model using your train.py code. After that I tested it on inference.py, I filled out the necessary terminal flags to make sure that my model is used.
After that, I tried to edit your inference.py, so that instead of going through the photos of a folder, I use the input of the RaspberryPi Camera that I have. I know that the Raspberry Pi Camera works cuz I can open it via OpenCV. I did a lot of code alteration, but basically what I put in is a flag for camera, that when set to True, uses the camera feed instead.
Here is the code that I have:
` import numpy as np
import time
import plac
import os
import cv2
import gi
from model import MobileDetectNetModel
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
gi.require_version('Gst', '1.0')
@plac.annotations(
inference_type=("Type of inference to test (TF, FP32, FP16, INT8)", 'option', 'T', str),
batch_size=("Size of the TensorRT batch", 'option', 'B', int),
weights=("Model weights", 'option', 'W', str),
test_path=("Test images path", 'option', 'I', str),
merge=("Test images only: Merge detected regions", 'flag', 'm', bool),
stage=("Test images only: Augmentation training stage", 'option', 's', str),
limit=("Test images only: Max number of images to run inference on", 'option', 'l', int),
confidence=("Test images only: Minimum confidence in coverage to draw bbox", "option", "c", float),
visualize=("Visualize the inference", "option", "V", bool),
camera=("Use camera feed. Ignores test_path. Boolean.", "option", "C", bool)
)
# Set inference_type to FP16 to use TensorRT
def main(inference_type: str = "FP16",
batch_size: int = 1,
test_path: str = None,
weights: str = None,
merge: bool = False,
stage: str = "test",
limit: int = 20,
confidence: float = 0.1,
visualize: bool = True,
camera: bool = False):
keras_model = MobileDetectNetModel.complete_model()
if weights is not None:
keras_model.load_weights(weights, by_name=True)
images_done = 0
if test_path is not None:
# import cv2
if stage != 'test':
from generator import MobileDetectNetSequence
seq = MobileDetectNetSequence.create_augmenter(stage)
else:
seq = None
images_full = []
images_input = []
images_scale = []
for r, d, f in os.walk(test_path):
for file in f:
image_full = cv2.imread(os.path.join(r, file))
image_input = cv2.resize(image_full, (224, 224))
scale_width = image_full.shape[1] / 224
scale_height = image_full.shape[0] / 224
images_scale.append((scale_width, scale_height))
if stage != 'test':
seq_det = seq.to_deterministic()
image_aug = (seq_det.augment_image(image_input).astype(np.float32) / 127.5) - 1.
else:
image_aug = image_input.astype(np.float32) / 127.5 - 1.
images_full.append(image_full)
images_input.append(image_aug)
images_done += 1
if images_done == limit:
break
if images_done == limit:
break
x_test = np.array(images_input)
else:
#x_test = np.random.random((limit, 224, 224, 3))
x_test = np.random.random((224, 224, 3))
# x_test = np.random.random((224, 224, 3))
x_cold = np.random.random((batch_size, 224, 224, 3))
print(f'Inference Type is {inference_type}')
if inference_type == 'K':
keras_model.predict(x_cold)
t0 = time.time()
model_outputs = keras_model.predict(x_test)
t1 = time.time()
elif inference_type == 'TF':
tf_engine = keras_model.tf_engine()
tf_engine.infer(x_cold)
t0 = time.time()
model_outputs = tf_engine.infer(x_test)
t1 = time.time()
elif inference_type == 'FP32':
tftrt_engine = keras_model.tftrt_engine(precision='FP32', batch_size=batch_size)
tftrt_engine.infer(x_cold)
t0 = time.time()
model_outputs = tftrt_engine.infer(x_test)
t1 = time.time()
# WE ARE USING THIS INFERENCE TYPE, TFTRT
elif inference_type == 'FP16':
tftrt_engine = keras_model.tftrt_engine(precision='FP16', batch_size=batch_size)
tftrt_engine.infer(x_cold)
#t0 = time.time()
#model_outputs = tftrt_engine.infer(x_test)
#t1 = time.time()
elif inference_type == 'INT8':
tftrt_engine = keras_model.tftrt_engine(precision='INT8', batch_size=batch_size)
tftrt_engine.infer(x_cold)
t0 = time.time()
model_outputs = tftrt_engine.infer(x_test)
t1 = time.time()
else:
raise ValueError("Invalid inference type")
#print('Time: ', t1 - t0)
#print('FPS: ', x_test.shape[0]/(t1 - t0))
if not visualize:
return
# if len(model_outputs) == 2:
# classes, bboxes = model_outputs
# TF / TensorRT models won't output regions (not useful for production)
#elif len(model_outputs) == 3:
# regions, bboxes, classes = model_outputs
#else:
# raise ValueError("Invalid model length output")
if test_path is not None and camera is False:
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
# get colormap
ncolors = 256
color_array = plt.get_cmap('viridis')(range(ncolors))
# change alpha values
color_array[:, -1] = np.linspace(0.0, 1.0, ncolors)
# create a colormap object
map_object = LinearSegmentedColormap.from_list(name='viridis_alpha', colors=color_array)
# register this new colormap with matplotlib
plt.register_cmap(cmap=map_object)
for idx in range(0, len(images_full)):
rectangles = []
# Does this only get the first 7 items?
for y in range(0, 7):
for x in range(0, 7):
if classes[idx, y, x, 0] >= confidence:
rect = [
int(bboxes[idx, int(y), int(x), 0] * 224),
int(bboxes[idx, int(y), int(x), 1] * 224),
int(bboxes[idx, int(y), int(x), 2] * 224),
int(bboxes[idx, int(y), int(x), 3] * 224)]
rectangles.append(rect)
if merge:
rectangles, merges = cv2.groupRectangles(rectangles, 1, eps=0.75)
scale_width, scale_height = images_scale[idx]
for rect in rectangles:
cv2.rectangle(images_full[idx],
(int(rect[0]*scale_width), int(rect[1]*scale_height)),
(int(rect[2]*scale_width), int(rect[3]*scale_height)),
(0, 255, 0), 5)
plt.imshow(cv2.cvtColor(images_full[idx], cv2.COLOR_BGR2RGB), alpha=1.0, aspect='auto')
plt.imshow(
cv2.resize(classes[idx].reshape((7, 7)),
(images_full[idx].shape[1], images_full[idx].shape[0])),
interpolation='nearest', alpha=0.5, cmap='viridis_alpha', aspect='auto')
plt.show()
font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (10, 500)
fontScale = 1
fontColor = (255, 255, 255)
lineType = 2
if camera is True:
print('camera flag detected!')
#cap = cv2.VideoCapture("nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720, format=(string)NV12, framerate=(fraction)21/1 ! nvvidconv flip-method=2 ! video/x-raw, format=(string)BGRx, width=(int)960, height=(int)616 ! videoconvert ! video/x-raw, format=(string)BGR ! appsink")
cap = cv2.VideoCapture("nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720, format=(string)NV12, framerate=(fraction)60/1 ! nvvidconv flip-method=2 ! video/x-raw, format=(string)BGRx, width=(int)960, height=(int)616 ! videoconvert ! appsink")
if cap.isOpened():
cv2.namedWindow("demo")
while True:
ret_val, image_np = cap.read()
image_raw = image_np
#print(f'*** original shape is {image_np.shape}')
# Expand the dimensions
image_np_expanded = np.expand_dims(image_np, axis=0)
#print(f'*** image expanded shape is {image_np_expanded.shape}')
images_full = []
images_input = []
images_scale = []
dim = (224, 224)
image_input = cv2.resize(image_raw, (224, 224))
#image_input = image_np_expanded
#print(f'image_raw shape is = {image_input.shape}')
image_full = np.expand_dims(image_input, axis=0)
#print(f'image_full shape after expanding is = {image_full.shape}')
#scale_width = image_full.shape[1] / 224
#scale_height = image_full.shape[0] / 224
#images_scale.append((scale_width, scale_height))
if stage != 'test':
seq_det = seq.to_deterministic()
image_aug = (seq_det.augment_image(image_input).astype(np.float32) / 127.5) - 1.
else:
image_aug = image_input.astype(np.float32) / 127.5 - 1.
#images_full.append(image_full)
#images_full.append(image_aug)
t0 = time.time()
#print(f'shape of image full before sending to ')
model_outputs = tftrt_engine.infer(image_full)
t1 = time.time()
rectangles = []
#print(f'length of model_outputs is = {len(model_outputs)}')
if len(model_outputs) == 2:
classes, bboxes = model_outputs
# TF / TensorRT models won't output regions (not useful for production)
elif len(model_outputs) == 3:
regions, bboxes, classes = model_outputs
else:
raise ValueError("Invalid model length output")
framerate = 1.0/(t1 - t0)
#print('Time: ', t1 - t0)
#print('FPS: ', framerate)
print()
for y in range(0, 7):
for x in range(0, 7):
#print(f'confidence is = {classes[0, y, x, 0]}')
if classes[0, y, x, 0] >= confidence:
#print('confidence is enough!')
rect = [
int(bboxes[0, int(y), int(x), 0] * 224),
int(bboxes[0, int(y), int(x), 1] * 224),
int(bboxes[0, int(y), int(x), 2] * 224),
int(bboxes[0, int(y), int(x), 3] * 224)]
print(f'rectangle is = {rect}')
rectangles.append(rect)
#else:
# print('confidence not high enough')
rectangles, merges = cv2.groupRectangles(rectangles, 1, eps=0.75)
#scale_width, scale_height = images_scale[idx]
if len(rectangles) > 0:
print(f'rectangle count is = {len(rectangles)}')
for rect in rectangles:
cv2.rectangle(image_raw,
(int(rect[0]), int(rect[1])),
(int(rect[2]), int(rect[3])),
(0, 255, 0), 5)
cv2.putText(image_raw, "FPS: {0:.2f}".format(framerate), bottomLeftCornerOfText, font, fontScale, fontColor, lineType)
cv2.imshow("demo", image_raw)
if cv2.waitKey(1) == ord('q'):
break
else:
print('camera open failed')
cv2.destroyAllWindows()
if __name__ == '__main__':
plac.call(main)
`
Basically what happens is that I get the captured frame, and run it through inference. When I run the script (again, using my own model), the camera feed opens just fine, BUT when I make it view a photo of the object I trained it on, the same photos from the folder that I test it with, it doesn't detect my object anymore.
Basically, I'm trying to use my model and your base code in inference.py to run Object Detection on from the Camera Feed, but I haven't had any luck.
The text was updated successfully, but these errors were encountered:
Just for a little of background, I trained a custom object detector model using your
train.py
code. After that I tested it oninference.py
, I filled out the necessary terminal flags to make sure that my model is used.After that, I tried to edit your
inference.py
, so that instead of going through the photos of a folder, I use the input of the RaspberryPi Camera that I have. I know that the Raspberry Pi Camera works cuz I can open it via OpenCV. I did a lot of code alteration, but basically what I put in is a flag forcamera
, that when set toTrue
, uses the camera feed instead.Here is the code that I have:
` import numpy as np
import time
import plac
import os
import cv2
`
Basically what happens is that I get the captured frame, and run it through inference. When I run the script (again, using my own model), the camera feed opens just fine, BUT when I make it view a photo of the object I trained it on, the same photos from the folder that I test it with, it doesn't detect my object anymore.
Basically, I'm trying to use my model and your base code in
inference.py
to run Object Detection on from the Camera Feed, but I haven't had any luck.The text was updated successfully, but these errors were encountered: