In [2]:
%%capture
import mrcnn
import mrcnn.config
import mrcnn.model
import mrcnn.visualize
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np

In [3]:
%%capture captured
CLASS_NAMES = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

class SimpleConfig(mrcnn.config.Config):
    NAME = "coco_inference"
    
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    NUM_CLASSES = len(CLASS_NAMES)

model = mrcnn.model.MaskRCNN(mode="inference", 
                             config=SimpleConfig(),
                             model_dir=os.getcwd())

model.load_weights("/home/kabilan/Desktop/caption/dataset/mask_rcnn_coco.h5", by_name=True)

2022-05-09 22:48:58.713912: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-05-09 22:49:04.168384: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-05-09 22:49:04.168404: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kabilan-pt4987): /proc/driver/nvidia/version does not exist
2022-05-09 22:49:04.169271: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-05-09 22:49:04.197921: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2599990000 Hz
2022-05-09 22:49:04.198843: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5588dc48a5d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-05-09

In [20]:
class Node:
    def __init__(self,start,end,class_id,class_name):
        self.start = start
        self.end = end
        self.class_id = class_id
        self.class_name = class_name
        
    def get_area(self):
        height = self.end[1] - self.start[1]
        width = self.end[0] - self.start[0]
        return width * height
    
    
    def to_json(self):
        mdict = {
            "start_x":self.start[0],
            "start_y":self.start[1],
            "end_x":self.end[0],
            "end_y":self.end[1],
            "class_name":self.class_name,
            "class_id":self.class_id
        }
        return mdict
        

In [21]:
class DirectionService():
    
    def __init__(self):
        self.model = model = mrcnn.model.MaskRCNN(mode="inference",config=SimpleConfig(),model_dir=os.getcwd())
        model.load_weights("/home/kabilan/Desktop/caption/dataset/mask_rcnn_coco.h5", by_name=True)
  
    def get_objects_and_locations(self,image_path):
        image = cv2.cvtColor(image_path, cv2.COLOR_BGR2RGB)
        rois = self.model.detect([image], verbose=0)[0]
        return rois
    
    def get_pos_node(self,image):
        im_width,im_height = image.shape[1],image.shape[0] 
        cur_pos = (im_height,int(im_width/2)) 
        pos_node = Node(cur_pos,cur_pos,-1,"current_position") 
        return pos_node

    def get_angle(self,node_a,node_b):
        x1,y1 = node_a.end
        x2,y2 = node_b.end
        tan = (y2-y1)/(x2-x1)
        return tan 
    
    def distance(self,node_a,node_b):
        return np.linalg.norm(np.array(node_a.end) - np.array(node_b.end))
    
    def get_nodes(self,res):
        objects = []
        for i in range(len(res['rois'])):
            class_name = CLASS_NAMES[res['class_ids'][i]]
            class_id = res['class_ids'][i]
            points = res['rois'][i]
            start = (points[1],points[0])
            end = (points[3],points[2])
            score = res['scores'][i]
            ob = Node(start,end,class_id,class_name)
            if score > 0.8:
                objects.append(ob)
        return objects
    
    def parse_direction(self,pos_node,old_target_node,target_node):
        dir_str = ""
        angle = self.get_angle(pos_node,target_node)
        movement = ""
        if angle < -1.0:
            movement = "right"
        elif angle > 1.0:
            movement = "left"
        else:
            movement = "front"
        dis_str = ""
        if old_target_node is not None:
            if old_target_node.get_area() < target_node.get_area():
                dis_str = "getting closer to"
            else:
                dis_str = "moving away from"
        if dis_str == "":
            return f"move {movement}"
        return f" Your are {dis_str} {CLASS_NAMES[target_node.class_id]},move {movement} "
    
    
    def process(self,image,target_node):
        pos_node = self.get_pos_node(image)
        res = self.get_objects_and_locations(image)
        objects = self.get_nodes(res)
        for each in objects:
            if each.class_id == target_class_id:
                return self.parse_direction(pos_node,target_node,each) ,  each
        
    

In [22]:
target_node = None
target_class_id = CLASS_NAMES.index('bottle') 

detection_service = DirectionService()

for each in os.listdir('../steps'):
    img_path = "/home/kabilan/Desktop/caption/steps/"+each
    image = cv2.imread(img_path,1)
    direction , target_node  = detection_service.process(image,target_node)
    print(target_node.to_json())
    print(direction)

{'start_x': 694, 'start_y': 385, 'end_x': 732, 'end_y': 499, 'class_name': 'bottle', 'class_id': 40}
move front
{'start_x': 701, 'start_y': 413, 'end_x': 742, 'end_y': 544, 'class_name': 'bottle', 'class_id': 40}
 Your are getting closer to bottle,move front 
{'start_x': 466, 'start_y': 382, 'end_x': 500, 'end_y': 480, 'class_name': 'bottle', 'class_id': 40}
 Your are moving away from bottle,move front 
{'start_x': 614, 'start_y': 255, 'end_x': 772, 'end_y': 829, 'class_name': 'bottle', 'class_id': 40}
 Your are getting closer to bottle,move right 
{'start_x': 694, 'start_y': 411, 'end_x': 742, 'end_y': 596, 'class_name': 'bottle', 'class_id': 40}
 Your are moving away from bottle,move front 
{'start_x': 848, 'start_y': 277, 'end_x': 886, 'end_y': 391, 'class_name': 'bottle', 'class_id': 40}
 Your are moving away from bottle,move right 
