In [None]:
import cv2
import torch
import numpy as np
from depth_anything_v2.dpt import DepthAnythingV2
import matplotlib.pyplot as plt


In [None]:
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

encoder = 'vitl' # or 'vits', 'vitb', 'vitg'

model = DepthAnythingV2(**model_configs[encoder])
model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location='cuda:0'))
model.cuda()
model.eval()


In [None]:
image_fname = '/home/atkesonlab/multLightWorkspace/data/lego_toy/images/left_1.png'
depth_fname = '/home/atkesonlab/multLightWorkspace/data/lego_toy/depths/left_1_depth.npy'


In [None]:
raw_img = cv2.imread(image_fname)
# raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)/255.0
# plt.imshow(raw_img)
depth = model.infer_image(raw_img) # HxW raw depth map in numpy
# plt.imshow(depth)
# plt.imsave('./depth.png', depth)
depth.shape

In [None]:
import open3d as o3d
from kornia.geometry import depth_to_3d, depth_to_normals
cam_mtx = np.array([[       1553.9663302771853,      0.0,       339.47357153102087    ],
                    [      0.0,      1556.191376668082,      288.44960314558057    ],
                    [      0.0,       0.0,       1.0    ]  ])
points = depth_to_3d (torch.from_numpy(depth[None, None, ...]),camera_matrix= torch.from_numpy(cam_mtx[ None, ...]),normalize_points= False).squeeze()
x, y, z = points[0,...].flatten(), points[1,...].flatten(), points[2,...].flatten()

normals = depth_to_normals( depth = torch.from_numpy(depth[None, None, ...]),camera_matrix= torch.from_numpy(cam_mtx[ None, ...])).squeeze()
nx, ny, nz = normals[0,...].flatten(), normals[1,...].flatten(), normals[2,...].flatten()
# plt.imshow((normals.transpose(2,0).cpu().numpy()+1)/2)
# plt.imsave('./normals.png', (normals.permute(1,2,0).cpu().numpy()+1)/2)


raw_img = cv2.imread(image_fname)
raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)/255.0

r,g,b = raw_img[...,0].flatten(), raw_img[...,1].flatten(), raw_img[...,2].flatten()
pcd = o3d.geometry.PointCloud()
pts = torch.stack([x,y,z], dim=0).detach().cpu().numpy()
normals = torch.stack([nx,ny,nz], dim=0).detach().cpu().numpy()
pcd.points = o3d.utility.Vector3dVector(pts.T)
pcd.normals = o3d.utility.Vector3dVector(normals.T)
pcd.colors = o3d.utility.Vector3dVector(np.vstack([r,g,b]).T)

o3d.io.write_point_cloud('./points.ply', pcd)


In [None]:
from skimage.segmentation import felzenszwalb, quickshift 
from skimage.segmentation import mark_boundaries

# segments = felzenszwalb(raw_img, scale = 0.1, sigma= 0.1, min_size= 200)
segments = quickshift(raw_img, kernel_size=11, max_dist=6, ratio=0.5)
plt.imshow(mark_boundaries(raw_img, segments))
plt.show()
num_segments = np.max(segments)
# print(segments)


In [None]:
true_depth = np.load(depth_fname)
# plt.imsave('./depth.png', true_depth)
scaled_depth = np.zeros_like(true_depth)
scale_all= []
shift_all = []
weights_all = []

def compute_scale_and_shift(prediction, target, mask):
    # system matrix: A = [[a_00, a_01], [a_10, a_11]]
    # print(mask.shape, target.shape, prediction.shape,'<---??')
    a_00 = torch.sum(mask * prediction * prediction, (1, 2))
    a_01 = torch.sum(mask * prediction, (1, 2))
    a_11 = torch.sum(mask, (1, 2))

    # right hand side: b = [b_0, b_1]
    b_0 = torch.sum(mask * prediction * target, (1, 2))
    b_1 = torch.sum(mask * target, (1, 2))

    # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b
    x_0 = torch.zeros_like(b_0)
    x_1 = torch.zeros_like(b_1)

    det = a_00 * a_11 - a_01 * a_01
    valid = det.nonzero()

    x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid]
    x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid]

    return x_0, x_1

true_depth_t = torch.tensor(true_depth[None,...], dtype=torch.float32, device='cuda:0', requires_grad=False)
pred_depth_t = torch.tensor(depth[None,...], dtype=torch.float32, device='cuda:0', requires_grad=False)
out_depth_t = torch.zeros_like(true_depth_t) 
# out_depth = np.zeros_like(true_depth) 
for id in range(num_segments):
    mask = (segments == id)
    mask_t = torch.tensor(mask[None,...], dtype=torch.float32, device='cuda:0', requires_grad=False)
    scale, shift = compute_scale_and_shift(pred_depth_t, true_depth_t, mask_t)
    # out_depth_t = scale.view(-1, 1, 1) * pred_depth_t + shift.view(-1, 1, 1)
    # out_depth_t += scale.view(-1, 1, 1) * pred_depth_t * mask_t + shift.view(-1, 1, 1)
    scale_all.append(scale.cpu().numpy())
    shift_all.append(shift.cpu().numpy())
    weights_all.append(mask.sum())

# out_depth = out_depth_t.squeeze().detach().cpu().numpy()

# plt.imshow(out_depth)
# plt.show()

# print( out_depth.min(), out_depth.max())

# plt.plot(scale_all, label = 'scale')
# # plt.plot(shift_all, label = 'shift')
# plt.legend()
# plt.show()
from scipy.stats import mode
# mode_scale = np.median(np.array(scale_all))
# mode_shift = np.median(np.array(shift_all))
mode_scale = np.average(np.array(scale_all).squeeze(), weights = np.array(weights_all))
mode_shift = np.average(np.array(shift_all).squeeze(), weights = np.array(weights_all))

print(f"mode scele {mode_scale} mode shift {mode_shift}")
print(scale_all[-1], shift_all[-1])
out_depth = depth * mode_scale + shift_all[-1]

plt.imshow(out_depth)
plt.show()




In [None]:

points = depth_to_3d (torch.from_numpy(out_depth[None, None, ...]),camera_matrix= torch.from_numpy(cam_mtx[ None, ...]),normalize_points= False).squeeze()
x, y, z = points[0,...].flatten(), points[1,...].flatten(), points[2,...].flatten()

normals = depth_to_normals( depth = torch.from_numpy(out_depth[None, None, ...]),camera_matrix= torch.from_numpy(cam_mtx[ None, ...])).squeeze()
nx, ny, nz = normals[0,...].flatten(), normals[1,...].flatten(), normals[2,...].flatten()
# plt.imshow((normals.transpose(2,0).cpu().numpy()+1)/2)
# plt.imsave('./normals.png', (normals.permute(1,2,0).cpu().numpy()+1)/2)


raw_img = cv2.imread(image_fname)
raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)/255.0

r,g,b = raw_img[...,0].flatten(), raw_img[...,1].flatten(), raw_img[...,2].flatten()
pcd = o3d.geometry.PointCloud()
pts = torch.stack([x,y,z], dim=0).detach().cpu().numpy()
normals = torch.stack([nx,ny,nz], dim=0).detach().cpu().numpy()
pcd.points = o3d.utility.Vector3dVector(pts.T)
pcd.normals = o3d.utility.Vector3dVector(normals.T)
pcd.colors = o3d.utility.Vector3dVector(np.vstack([r,g,b]).T)

o3d.io.write_point_cloud('./points_upgraded.ply', pcd)

In [None]:
# !pip install -q git+https://github.com/huggingface/transformers.git


from transformers import pipeline
generator = pipeline("mask-generation", model="facebook/sam-vit-huge", device=0)

In [None]:
# plt.imshow(raw_img[:,:,::-1])
from PIL import Image
import gc
raw_img = '/home/atkesonlab/multLightWorkspace/data/lego_toy/images/left_1.png'
raw_img = Image.open(image_fname).convert("RGB")
outputs = generator(raw_img, points_per_batch=1)

In [None]:
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)
    del mask
    gc.collect()

def show_masks_on_image(raw_image, masks):
  plt.imshow(np.array(raw_image))
  ax = plt.gca()
  ax.set_autoscale_on(False)
  for mask in masks:
      show_mask(mask, ax=ax, random_color=True)
  plt.axis("off")
  plt.show()
  del mask
  gc.collect()

In [None]:
masks = outputs["masks"]
scores = outputs["scores"]
print(scores)
# show_masks_on_image(raw_img, masks)
for mask, score in zip(masks, scores):
    plt.imshow(mask)
    plt.title(f"score : {score}")
    plt.show()
