In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import pickle
# import utils

def load_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [None]:
# !unzip ./testing_data_final_filtered.zip
# !ls

In [None]:
# Change these 
testing_data_dir = "testing_data_final_filtered/testing_data/v2.2"
split_dir_test = "testing_data_final_filtered/testing_data/"

In [None]:
def get_split_files(split_name,split_dir,data_dir):
    with open(os.path.join(split_dir, f"{split_name}.txt"), 'r') as f:
        prefix = [os.path.join(data_dir, line.strip()) for line in f if line.strip()]
        rgb = [p + "_color_kinect.png" for p in prefix]
        depth = [p + "_depth_kinect.png" for p in prefix]
#         label = [p + "_label_kinect.png" for p in prefix]
        meta = [p + "_meta.pkl" for p in prefix]
    return rgb, depth,meta

In [None]:
rgb_files, depth_files, meta_files = get_split_files('test',split_dir_test,testing_data_dir)
with open(os.path.join(split_dir_test, f"{'test'}.txt"), 'r') as f:
    scenes = [line.strip()for line in f if line.strip()]
# print((scenes))

In [None]:
scenes

In [None]:
from matplotlib.cm import get_cmap
NUM_OBJECTS = 79
cmap = get_cmap('rainbow', NUM_OBJECTS)
COLOR_PALETTE = np.array([cmap(i)[:3] for i in range(NUM_OBJECTS + 3)])
COLOR_PALETTE = np.array(COLOR_PALETTE * 255, dtype=np.uint8)
COLOR_PALETTE[-3] = [119, 135, 150]
COLOR_PALETTE[-2] = [176, 194, 216]
COLOR_PALETTE[-1] = [255, 255, 225]

## Get familiar with the data

In [None]:
rgb = np.array(Image.open(rgb_files[0])) / 255   # convert 0-255 to 0-1
depth = np.array(Image.open(depth_files[0])) / 1000   # convert from mm to m
# label = np.array(Image.open(label_files[0]))
print(rgb.shape)
plt.figure(figsize=(15, 10))
plt.subplot(1, 3, 1)
plt.imshow(rgb)
plt.subplot(1, 3, 2)
plt.imshow(depth)
# plt.subplot(1, 3, 3)
# plt.imshow(COLOR_PALETTE[label])  # draw colorful segmentation

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pandas as pd

objects_csv = 'testing_data_final_filtered/testing_data/objects_v1.csv'

def read_image(img_path):
    '''
    inputs:
    img_path : the location of the image to be read
    outputs:
    image converted to torch.tensor
    '''
    image = np.array(Image.open(img_path))
#     print(image)
    image = torch.from_numpy(image)
    return image

class mydataset(Dataset):
    # define the init method
    def __init__(self, img_files, img_dir, scene_names, object_files, transform=None, target_transform = None) -> None:
        super().__init__()
#         self.target_labels = annotations_files
        self.img_dir = img_dir
        self.img_files = img_files
        self.scenes = scene_names
        self.objects = pd.read_csv(object_files)
        self.transform = transform
        self.target_transform = target_transform

    # define the len method
    def __len__(self):
        return len(self.img_files)

    # define the getitem() method
    def __getitem__(self,idx):
        img_path = self.img_files[idx]
        scene_idx = self.scenes[idx]#os.path.join(self.img_dir, self.img_files[idx])
#         target_path = self.target_labels[idx]#os.path.join(self.img_dir, self.target_labels[idx])
        image = read_image(img_path)/255.0 # divide by 255 or do some normalization using transforms
#         label = read_image(target_path)
        if self.transform:
            image  = self.transform(image)
        return image,scene_idx

test_data = mydataset(rgb_files,testing_data_dir,scenes,objects_csv)

test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)

In [None]:
import torch.nn as nn

class Segmentation(nn.Module): 
    def __init__(self):
        super().__init__()
        self.c1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # nn.Conv2d(64, 64, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(64),
            # nn.ReLU(),
        )
        self.c2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # nn.Conv2d(128, 128, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(128),
            # nn.ReLU(),
        )
        self.c3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # nn.Conv2d(256, 256, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(256),
            # nn.ReLU(),
        )
        self.c4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # nn.Conv2d(512, 512, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(512),
            # nn.ReLU(),
)
        self.p1 = nn.MaxPool2d(2)
        self.p2 = nn.MaxPool2d(2)
        self.p3 = nn.MaxPool2d(2)
        self.d1 = nn.ConvTranspose2d(128, 64, 2, 2)
        self.d2 = nn.ConvTranspose2d(256, 128, 2, 2)
        self.d3 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.dc1 = nn.Sequential(
            nn.Conv2d(128, 64, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # nn.Conv2d(64, 64, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(64),
            # nn.ReLU(),
            nn.Conv2d(64, 82, 1),
#             nn.BatchNorm2d(82),
#             nn.ReLU(),
        )
        self.dc2 = nn.Sequential(
            nn.Conv2d(256, 128, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # nn.Conv2d(128, 128, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(128),
            # nn.ReLU(),
)
        self.dc3 = nn.Sequential(
            nn.Conv2d(512, 256, 3, padding=1, padding_mode="reflect"),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # nn.Conv2d(256, 256, 3, padding=1, padding_mode="reflect"),
            # nn.BatchNorm2d(256),
            # nn.ReLU(),
)
    def forward(self, x): 
        x1 = self.c1(x)

        x2 = self.c2(self.p1(x1))

        x3 = self.c3(self.p2(x2))

        x4 = self.c4(self.p3(x3))

        y3 = torch.cat([x3, self.d3(x4)], dim=1)

        y2 = torch.cat([x2, self.d2(self.dc3(y3))], dim=1)

        y1 = torch.cat([x1, self.d1(self.dc2(y2))], dim=1)
        output = self.dc1(y1).squeeze(1) 
        return output

In [None]:
import torch
import numpy

device = "cuda"
model = torch.load('./model_new_5.pth')
model.to(device)
model.eval()

In [None]:
with torch.no_grad():
    for data in test_dataloader:
        torch.cuda.empty_cache()
        temp_out = model(torch.permute(data[0],(0,3,1,2)).to(device))
        test_labels = torch.argmax(temp_out,dim=1)
        test_labels = test_labels.squeeze(0).detach().cpu().numpy()
        im = Image.fromarray(test_labels.astype(np.uint8))
        print(f"./testing_data_final_filtered/testing_data/v2.2/{data[1][0]}_label_kinect.png is saved")
        im.save(f"testing_data_final_filtered/testing_data/v2.2/{data[1][0]}_label_kinect.png")

In [None]:
len(test_dataloader)

## Lift depth to point cloud

In [None]:
# You can use other visualization from previous homeworks, like Open3D
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


def show_points(points):
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.set_xlim3d([-2, 2])
    ax.set_ylim3d([-2, 2])
    ax.set_zlim3d([0, 4])
    ax.scatter(points[:, 0], points[:, 2], points[:, 1])
    

def compare_points(points1, points2):
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.set_xlim3d([-0.5, 0.5])
    ax.set_ylim3d([-0.5, 0.5])
    ax.set_zlim3d([0, 1])
    ax.scatter(points1[:, 0], points1[:, 2], points1[:, 1])
    ax.scatter(points2[:, 0], points2[:, 2], points2[:, 1])

In [None]:
import copy
import open3d
from numpy.linalg import inv
from tqdm.notebook import trange, tqdm

def draw_registration_result(source, target, transformation):
    source_temp = copy.deepcopy(source)
    target_temp = copy.deepcopy(target)
    source_temp.paint_uniform_color([1, 0.706, 0])
    target_temp.paint_uniform_color([0, 0.651, 0.929])
    source_temp.transform(transformation)
    open3d.visualization.draw_geometries([source_temp, target_temp],
                                      zoom=3,
                                      front=[0.9288, -0.2951, -0.2242],
                                      lookat=[1.6784, 2.0612, 1.4451],
                                      up=[-0.3402, -0.9189, -0.1996])

src = open3d.geometry.PointCloud()
tgt = open3d.geometry.PointCloud()

# od = 4
test_dump = {}
for i in trange(len(depth_files)):
    trans = []
    poses_test = [None]*79
    meta_test = load_pickle(meta_files[i])
    train_dump = load_pickle('train_dump')
    intrinsic = meta_test['intrinsic']
    depth = np.array(Image.open(depth_files[i])) / 1000
    label = np.array(Image.open(label_files[i]))
    z = depth
    v, u = np.indices(z.shape)
    uv1 = np.stack([u + 0.5, v + 0.5, np.ones_like(z)], axis=-1)
    points_viewer = uv1 @ np.linalg.inv(intrinsic).T * z[..., None]  # [H, W, 3]
    crops_pcd = np.array([points_viewer[label==idx] for idx in meta_test['object_ids']])
    c2w_test = inv(meta_test['extrinsic'])
    for od in range(len(meta_test['object_ids'])):
        rmse = []
        t_list = []
        for k in range(len(train_dump[meta_test['object_names'][od]]['pcd'])):
            src_pcd = train_dump[meta_test['object_names'][od]]['pcd'][k]
            src_pcd = src_pcd.squeeze(0)
            if(len(src_pcd)<600):
                continue
            src_pose = train_dump[meta_test['object_names'][od]]['gt_world'][k]
            inv_gt = inv(src_pose)
            src_pcd = src_pcd@inv_gt[:3,:3].T + inv_gt[:3,3]
            src_pcd = src_pcd.reshape([-1,3])

            tgt_pcd = crops_pcd[od]
            tgt_pcd = tgt_pcd@c2w_test[:3,:3].T + c2w_test[:3,3]
            
            src_pcd = src_pcd[:600,:]
            tgt_pcd = tgt_pcd[:600,:]

            src.points = open3d.utility.Vector3dVector(src_pcd.reshape([-1, 3]))

            tgt.points = open3d.utility.Vector3dVector(tgt_pcd.reshape([-1, 3]))


            threshold = 0.02


            #dump_5.json with inv_gt
            trans_init = inv(inv_gt)
            q_bar = np.mean(tgt_pcd,axis=0)
            p_bar = np.mean(src_pcd,axis=0)
            trans_init[:3,3] = q_bar - np.matmul(trans_init[:3,:3],p_bar)


            reg_p2p = open3d.pipelines.registration.registration_icp(
                src, tgt, threshold, trans_init,
                open3d.pipelines.registration.TransformationEstimationPointToPoint(),
                open3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=200)
            )
            if(len(reg_p2p.correspondence_set)==0 or reg_p2p.inlier_rmse==0):
                continue
            rmse.append(reg_p2p.inlier_rmse)
            t_list.append(reg_p2p.transformation)

        # the below T is in transform from camera_src to camera_tgt
        if(len(rmse)==0):
            T = np.eye(4)
        else:
            T = t_list[np.argmin(rmse)]
#         T = reg_p2p.transformation
        trans.append(T)
        poses_test[meta_test['object_ids'][od]] = T.tolist()

    #write to json
    test_dump[scenes[i]] = {"poses_world":poses_test}
    

In [None]:
import json

print(type(test_dump))
file = open('test_dump_last_final.json', 'w')

# dump information to that file
json.dump(test_dump, file)

# close the file
file.close()

## Draw bounding boxes of poses on 2D image
If you are curious, take a look at `utils.py`. It is very simple.

In [None]:
s =160
scene = scenes[s]
boxed_image = np.array(Image.open(rgb_files[s])) / 255 
meta = load_pickle(meta_files[s])
poses_world = np.array([test_dump[scene]['poses_world'][idx] for idx in meta['object_ids']])
box_sizes = np.array([meta['extents'][idx] * meta['scales'][idx] for idx in meta['object_ids']])
for i in range(len(poses_world)):
    utils.draw_projected_box3d(
        boxed_image, poses_world[i][:3,3], box_sizes[i], poses_world[i][:3, :3], meta['extrinsic'], meta['intrinsic'],
        thickness=2)

Image.fromarray((boxed_image * 255).astype(np.uint8))

## Test data
Test data has everything but the poses. Testing data and training data are from the same distribution.

In [None]:
load_pickle("./testing_data_pose_filtered/testing_data/v2.2/1-1-1_meta.pkl").keys()

In [None]:
Image.open("./testing_data/v2.2/1-1-1_color_kinect.png")