In [3]:
import os
from skimage import io, transform
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms#, utils
# import torch.optim as optim

import sys
import pyrealsense2 as rs
import cv2
import numpy as np
from PIL import Image
import glob

from data_loader import RescaleT
from data_loader import ToTensor
from data_loader import ToTensorLab
from data_loader import SalObjDataset

from model import U2NET # full size version 173.6 MB
from model import U2NETP # small version u2net 4.7 MB

Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
                 It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe


OSError: [WinError 126] The specified module could not be found. Error loading "C:\ProgramData\Anaconda3\lib\site-packages\torch\lib\asmjit.dll" or one of its dependencies.

In [2]:
# normalize the predicted SOD probability map
def normPRED(d):
    ma = torch.max(d)
    mi = torch.min(d)
    dn = (d-mi)/(ma-mi)
    return dn

In [3]:
model_name='u2net'
# model_name='u2netp'

model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth')

In [4]:
# --------- 3. model define ---------
if(model_name=='u2net'):
    print("...load U2NET---173.6 MB")
    net = U2NET(3,1)
elif(model_name=='u2netp'):
    print("...load U2NEP---4.7 MB")
    net = U2NETP(3,1)
net.load_state_dict(torch.load(model_dir))
if torch.cuda.is_available():
    net.cuda()
net.eval()

...load U2NET---173.6 MB


U2NET(
  (stage1): RSU7(
    (rebnconvin): REBNCONV(
      (conv_s1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn_s1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu_s1): ReLU(inplace=True)
    )
    (rebnconv1): REBNCONV(
      (conv_s1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn_s1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu_s1): ReLU(inplace=True)
    )
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (rebnconv2): REBNCONV(
      (conv_s1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn_s1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu_s1): ReLU(inplace=True)
    )
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (rebnconv3): REBNCONV(
      (conv_s1): Conv2d(32, 32, k

In [33]:
def post_process_depth_frame(depth_frame, decimation_magnitude=1.0, spatial_magnitude=2.0, spatial_smooth_alpha=0.5,
                             spatial_smooth_delta=20, temporal_smooth_alpha=0.4, temporal_smooth_delta=20):

    # Post processing possible only on the depth_frame
    assert (depth_frame.is_depth_frame())

    # Available filters and control options for the filters
    decimation_filter = rs.decimation_filter()
    threshold_filter = rs.threshold_filter(min_dist=0.1, max_dist=3.0)
    spatial_filter = rs.spatial_filter()
    temporal_filter = rs.temporal_filter()
    hole_filling_filter = rs.hole_filling_filter()

    filter_magnitude = rs.option.filter_magnitude
    filter_smooth_alpha = rs.option.filter_smooth_alpha
    filter_smooth_delta = rs.option.filter_smooth_delta

    # Apply the control parameters for the filter
    decimation_filter.set_option(filter_magnitude, decimation_magnitude)
    spatial_filter.set_option(filter_magnitude, spatial_magnitude)
    spatial_filter.set_option(filter_smooth_alpha, spatial_smooth_alpha)
    spatial_filter.set_option(filter_smooth_delta, spatial_smooth_delta)
    temporal_filter.set_option(filter_smooth_alpha, temporal_smooth_alpha)
    temporal_filter.set_option(filter_smooth_delta, temporal_smooth_delta)

    # Apply the filters
    filtered_frame = decimation_filter.process(depth_frame)    # Decimation - reduces depth frame density
    filtered_frame = threshold_filter.process(filtered_frame)
    filtered_frame = spatial_filter.process(filtered_frame)    # Spatial    - edge-preserving spatial smoothing
    filtered_frame = temporal_filter.process(filtered_frame)    # Temporal   - reduces temporal noise
    filtered_frame = hole_filling_filter.process(filtered_frame)

    return filtered_frame

In [34]:
# Distance to ground
distance_to_ground = 89.5
# Pixel scale
pixel_scale = 0.09532857437299758

# Depth background
depth_background = []
# color background
color_background = []
background_depth = 0
x0, y0, x1, y1, click =  0, 0, 0, 0, False

# Kernel size of morphology
kernel = np.ones((5, 5), np.uint8)

In [50]:
# Create a pipline
pipeline = rs.pipeline()

config = rs.config()
config.enable_stream(rs.stream.depth, 1280, 720, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8,30)
profile = pipeline.start(config)
align_to = rs.stream.color
align = rs.align(align_to)

depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()

# cv2.namedWindow('frame')

while True:
    raw_frame = pipeline.wait_for_frames()
    aligned_frame = align.process(raw_frame)
    aligned_depth_frame = aligned_frame.get_depth_frame() 
    color_frame = aligned_frame.get_color_frame()
    frame = np.asanyarray(color_frame.get_data())
    
    if not aligned_depth_frame or not color_frame:
        continue    
    
    depth_filtered_frame = post_process_depth_frame(aligned_depth_frame)
    depth_frame = np.asanyarray(depth_filtered_frame.get_data())    
    image = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)

    h, w = image.shape[:2]

    image = transform.resize(image,(320,320),mode='constant')    

    tmpImg = np.zeros((image.shape[0],image.shape[1],3), np.float32)
    image = image/np.max(image)
    if image.shape[2]==1:
        tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
        tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
        tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
    else:
        tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
        tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
        tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
    
    tmpImg = tmpImg.transpose((2, 0, 1))
    tmpImg = tmpImg.reshape(1,3,320,320)
    tmpImg = torch.from_numpy(tmpImg)
    inputs_test = tmpImg
#     inputs_test = tmpImg.type(torch.FloatTensor)        # most cpu cost
    if torch.cuda.is_available():
        inputs_test = Variable(inputs_test.cuda())
    else:
        inputs_test = Variable(inputs_test)

    d1,d2,d3,d4,d5,d6,d7= net(inputs_test)

    # normalization
    pred = d1[:,0,:,:]
    pred = normPRED(pred)

    predict = pred
    predict = predict.squeeze()
    predict_np = predict.cpu().data.numpy()*255
    predict_np = np.uint8(predict_np)

    ret, th = cv2.threshold(predict_np, 127,255,cv2.THRESH_BINARY)
    th = cv2.resize(th,(w,h))
    th_3 = cv2.cvtColor(th, cv2.COLOR_GRAY2BGR)
    
    depth_segm = depth_frame[th==255]
    frame_segm = cv2.bitwise_and(frame, th_3)
    
    object_depth = depth_segm.astype(np.float32) * depth_scale
    object_depth[object_depth==0] = 300
    object_distance = np.sort(object_depth).reshape(-1)[int(object_depth.size * 0.1)] * 100 # IndexError: index 0 is out of bounds for axis 0 with size 0
    if object_distance >299:
        cv2.putText(frame_segm, 'No object', (20, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 1, cv2.LINE_AA)
    else:
        cv2.putText(frame_segm, '%f cm'%object_distance, (20, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 1, cv2.LINE_AA)
    
    depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_frame, alpha=0.03), cv2.COLORMAP_JET)
    depth_colormap_segm = cv2.bitwise_and(depth_colormap,th_3)
    
    concat = np.zeros((h,w*2,3),np.uint8)
    concat[:,:w,:] = frame
    concat[:,w:w*2,:] = frame_segm
#     concat[:,w*2:w*3,:] = depth_colormap_segm
    
#     cv2.imshow('frame_segm', frame_segm)
#     cv2.imshow('frame',frame)
#     cv2.imshow('depth_colormap_segm', depth_colormap_segm)   
    cv2.imshow('concat', concat)
    key = cv2.waitKey(1) 
    if key == 27:
        del d1,d2,d3,d4,d5,d6,d7
        break
    
cv2.destroyAllWindows()
pipeline.stop()