In [None]:
import os
import numpy as np
from vgn.networks import load_network
import torch
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from vgn.dataset_voxel_grasp_pc import DatasetVoxelGraspPCOcc
from pathlib import Path
import random
import trimesh

from vgn.simulation import ClutterRemovalSim
from vgn.utils.transform import Rotation, Transform
from vgn.utils.implicit import get_scene_from_mesh_pose_list, as_mesh
from vgn.perception import *

np.random.seed(0)
torch.manual_seed(0)

In [None]:
device = "cpu"
scene = "d7e7d6e296ec4abfaad79acd252ac9b3" #"58c827f23ec34543a72a4e7bc6fe362d" ##"58c827f23ec34543a72a4e7bc6fe362d"
root = Path('data/pile/data_pile_train_constructed_4M_HighRes_radomized_views_GPG_only')
raw_root = Path('data/pile/data_pile_train_random_raw_4M_radomized_views/')
model_path = 'best_models/23-05-01-08-11-39_dataset=data_pile_train_constructed_4M_HighRes_radomized_views,augment=False,net=6d_neu_grasp_pn_deeper,batch_size=32,lr=5e-05,PN_deeper_DIMS_CONT/best_neural_grasp_neu_grasp_pn_deeper_val_acc=0.9097.pt'
net = "neu_grasp_pn_deeper"
net_with_grasp_occ = True

camera_eye = np.load('viewpoint_f614e39ed9df4e1094d569cddc20979b.npy')
see_table = True
data_root = "/home/hypatia/6D-DAAD/GIGA"
size = 0.3
resolution = 64
mesh_list_file = os.path.join(raw_root, 'mesh_pose_list', scene + '.npz')
mesh_pose_list = np.load(mesh_list_file, allow_pickle=True)['pc']
sim = ClutterRemovalSim('pile', 'pile/train', gui=False, data_root=data_root) # parameters scene and object_set are not used
sim.setup_sim_scene_from_mesh_pose_list(mesh_pose_list, table=see_table, data_root=data_root) # Setting table to False because we don't want to render it
scene_mesh = get_scene_from_mesh_pose_list(mesh_pose_list, data_root=data_root)

net = load_network(model_path, device=device, model_type=net)
net = net.eval()

# NOTE: Renamed "grasps_with_clouds_gt".csv to "grasps_with_clouds.csv"
data = DatasetVoxelGraspPCOcc(root, raw_root, use_grasp_occ=False, num_point_occ=8000)

## Helper Functions

In [None]:
# trimesh.Scene([scene_mesh]).show()

In [None]:
unsq = lambda x: torch.as_tensor(x).unsqueeze(0).float()

def load_data(scene):
    index = random.choice(data.df[data.df.scene_id==scene].index)
    # pc, y, grasp_query, occ_points, occ =  data[index]
    pc, (label, width), (pos, rotations, grasps_pc_local, grasps_pc), pos_occ, occ_value = data[index]
    pc, label, width, pos, rotations, grasps_pc_local, grasps_pc, pos_occ, occ_value = unsq(pc), unsq(label), unsq(width), unsq(pos), unsq(rotations), unsq(grasps_pc_local), unsq(grasps_pc), unsq(pos_occ), unsq(occ_value)
    return pc, (label, width, occ_value), (pos, rotations, grasps_pc_local, grasps_pc), pos_occ

# Load

## Load network

In [None]:
tsdf, y, grasp_query, pos_occ = load_data(scene)

# Optional: Load another TSDF from the scene (note the "see_table" variable and whether the network was trained with or without the table)
def render_n_images(sim, n=1, random=False, noise_type=''):
    origin = Transform(Rotation.identity(), np.r_[size / 2, size / 2, 0.0])
    if random:
        theta = np.random.uniform(0.0, 5* np.pi / 12.0) # elevation: 0 to 75 degrees
        # theta = np.random.uniform(5*np.pi/12.0)
        # 75 degree reconstruction is unrealistic, try 60
        # theta = np.random.uniform(np.pi/3)
        # theta = np.random.uniform(np.pi/6, np.pi/4) # elevation: 30 to 45 degrees
        r = np.random.uniform(2, 2.4) * size
    else:
        theta = np.pi / 4.0 # 45 degrees from top view
        r = 2.0 * size
    
    phi_list = 2.0 * np.pi * np.arange(n) / n # circle around the scene
    extrinsics = [camera_on_sphere(origin, r, theta, phi) for phi in phi_list]
    depth_imgs = []

    for extrinsic in extrinsics:
        # Multiple views -> for getting other sides of pc
        depth_img = sim.camera.render(extrinsic)[1]
        # add noise
        # depth_img = apply_noise(depth_img, noise_type)
        
        depth_imgs.append(depth_img)

    return depth_imgs, extrinsics

# Get random scene image:
depth_imgs, extrinsics = render_n_images(sim, n=1, random=True, noise_type='')
# Show the image
# plt.imshow(depth_imgs[0])

# Make tsdf and pc from the image
tsdf = TSDFVolume(size, resolution=resolution)
for depth_img, extrinsic in zip(depth_imgs, extrinsics):
    tsdf.integrate(depth_img, sim.camera.intrinsic, extrinsic)
seen_pc = tsdf.get_cloud()
# Optional: Crop out table
lower = np.array([0.0 , 0.0 , 0.055])
upper = np.array([size, size, size])
bounding_box = o3d.geometry.AxisAlignedBoundingBox(lower, upper)
seen_pc = seen_pc.crop(bounding_box)
# convert to torch tensor
tsdf = torch.tensor(tsdf.get_grid(), device=device, dtype=torch.float32)

# Viz seen point cloud and camera position
# seen_pc.colors = o3d.utility.Vector3dVector(np.tile(np.array([0, 0.64, 0.93]), (np.asarray(seen_pc.points).shape[0], 1)))
# cam_pos_pc = o3d.geometry.PointCloud()
# cam_pos_pc.points = o3d.utility.Vector3dVector(np.array([extrinsics[0].inverse().translation]))
# cam_pos_pc.colors = o3d.utility.Vector3dVector(np.tile(np.array([0, 0.64, 0.93]), (np.asarray(cam_pos_pc.points).shape[0], 1)))
# visualizer.add_geometry(seen_pc)
# visualizer.add_geometry(cam_pos_pc)
# visualizer.show()


out = net(tsdf, grasp_query, p_tsdf=pos_occ)
occ = out[-1]

## Occupancy

In [None]:
# Check if network gives correct occupancies
x, y, z = torch.meshgrid(torch.linspace(start=-0.5, end=0.5 - 1.0 / 64, steps= 64), torch.linspace(start=-0.5, end=0.5 - 1.0 / 64, steps=64), torch.linspace(start=-0.5, end=0.5 - 1.0 / 64, steps=64))
        # 1, self.resolution, self.resolution, self.resolution, 3
pos = torch.stack((x, y, z), dim=-1).float().unsqueeze(0)
pos = pos.view(1, 64*64*64, 3)
c = net.encode_inputs(tsdf)
occupancies = net.decoder_tsdf(pos, c,)

In [None]:
import mcubes
import plotly.graph_objects as go
vertices, triangles = mcubes.marching_cubes(occupancies.view(64, 64, 64).detach().numpy(), 0.5)
x, y, z = vertices.T
i, j, k = triangles.T

In [None]:
camera_eye.shape

In [None]:
fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
                          i=i, j=j, k=k,
                        color='rgb(194, 30, 86)'))
camera_eye[2, 3] += 3
camera_eye = np.linalg.inv(camera_eye.squeeze())[:3, 3]
camera = dict(eye=dict(x=camera_eye[0], y=camera_eye[1], z=camera_eye[2]))
fig.update_layout(scene_camera=camera)
fig.update_xaxes(tickmode='linear')
fig.update_layout(autosize=False,
                    width  = 1600,
                    height = 1600)
fig.show()

In [None]:
# vertices = o3d.utility.Vector3dVector(vertices)
# trianlges = o3d.utility.Vector3dVector(triangles)
# reconstruction = trimesh.Trimesh(vertices=vertices, faces=triangles)
# reconstruction.visual.face_colors = [186, 0, 1, 50]


# reconstruction = trimesh.Scene([reconstruction])
# reconstruction.show()

In [None]:
# size = 0.3
# camera = reconstruction.camera
# cam_resolution = [1920, 1080]
# rot_by_x_degrees = 55
# # TODO: euler to rotation matrix
# pitch_rot = np.array([   [  1.0000000,  0.0000000,  0.0000000, 0.0],
#                             [  0.0000000,  0.5735765, -0.8191521, 0.0],
#                             [  0.0000000,  0.8191521,  0.5735765, 0.0],
#                             [  0.0000000,  0.0000000,  0.0000000, 1.0]   ])
# distance = 0.75
# camera_tf = camera.look_at(points=[[size/2,size/2,0.06]], rotation=pitch_rot, distance=distance)
# reconstruction.camera_transform = camera_tf
# reconstruction.camera.resolution = cam_resolution
# reconstruction.show(line_settings= {'point_size': 8})