In [1]:
#Environment set-up and libraries

#Base libraries
import numpy as np
import random
import torch
import torch.nn as nn
from datetime import datetime

#Plotting libraries
%matplotlib inline
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go

#Utilities libraries
from glob import glob 
import os

import open3d as o3d

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


### Loading the point clouds

In [2]:
def load_file(file_name):
    print(file_name)

    if file_name.endswith(".las") or file_name.endswith(".laz"):
        print("[INFO] .las (.laz) file loading")
        try:
            # import lidar .las data and assign to variable
            pcd = laspy.read(file_name)
            # examine the available features for the lidar file we have read
            # list(las.point_format.dimension_names)
            #
            # set(list(las.classification))

            # Creating, Filtering, and Writing Point Cloud Data
            # To create 3D point cloud data, we can stack together with the X, Y, and Z dimensions, using Numpy like this.
            point_data = np.stack([pcd.X, pcd.Y, pcd.Z], axis=0).transpose((1, 0))
            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(point_data)
            # points = point_data
            if pcd is not None:
                print("[Info] Successfully read", file_name)

                # Point cloud
                return pcd

        except Exception:
            print(".las, .laz file load failed")

    elif file_name.endswith(".e57"):
        print("[INFO] .e57 file loading")
        try:
            e57_file = pye57.E57(file_name)

            # other attributes can be read using:
            data = e57_file.read_scan(0)

            # 'data' is a dictionary with the point types as keys
            # assert isinstance(data["cartesianX"], np.ndarray)
            # assert isinstance(data["cartesianY"], np.ndarray)
            # assert isinstance(data["cartesianZ"], np.ndarray)

            point_xyz = np.stack([data["cartesianX"], data["cartesianY"], data["cartesianZ"]]).transpose((1, 0))
            # points_rgb = [data["colorRed"], data["colorGreen"], data["colorBlue"]]
            # points_intensity = data["intensity"]

            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(point_xyz)
            # points = o3d.utility.Vector3dVector(point_xyz)
            # points = point_xyz
            # pcd.colors = o3d.utility.Vector3dVector(points_rgb)
            # pcd.colors[:, 0] = points_intensity
            print("[Info] Successfully read", file_name)
            return pcd

        except Exception:
            print(".e57 file load failed")

    elif file_name.endswith(".bin"):
        print("[INFO] .bin file loading")
        try:
            size_float = 4
            list_pcd = []
            with open(file_name, "rb") as f:
                byte = f.read(size_float * 4)
                while byte:
                    x, y, z, intensity = struct.unpack("ffff", byte)
                    list_pcd.append([x, y, z])
                    byte = f.read(size_float * 4)
            np_pcd = np.asarray(list_pcd)
            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(np_pcd)
            print("[Info] Successfully read", file_name)
            return pcd

        except Exception:
            print(".bin file load failed")

    elif file_name.endswith(".ply"):
        pcd = o3d.io.read_point_cloud(file_name)
        points_xyz = np.asarray(pcd.points)
        #pcd = o3d.geometry.PointCloud() # No need to do that already a PointCloud
        pcd.points = o3d.utility.Vector3dVector(points_xyz)
        # points = points_xyz
        if pcd is not None:
            print("[Info] Successfully read", file_name)
            # Point cloud
            return pcd

    elif file_name.endswith(".pts"):
        try:
            with open(file_name, "r") as f:
                # Log every 1000000 lines.
                LOG_EVERY_N = 1000000
                points_np = []
                for line in f:
                    if len(line.split()) == 4:
                        x, y, z, i = [num for num in line.split()]
                        points_np.append([float(x), float(y), float(z), float(i)])
                        if (len(points_np) % LOG_EVERY_N) == 0:
                            print('point', len(points_np))
                    elif len(line.split()) == 3:
                        x, y, z = [num for num in line.split()]
                        points_np.append([float(x), float(y), float(z)])
                        if (len(points_np) % LOG_EVERY_N) == 0:
                            print('point', len(points_np))
                    elif len(line.split()) == 5:
                        x, y, z, i, zeroes_v = [num for num in line.split()]
                        points_np.append([float(x), float(y), float(z), float(i)])
                        if (len(points_np) % LOG_EVERY_N) == 0:
                            print('point', len(points_np))
                    elif len(line.split()) == 7:
                        x, y, z, r, g, b, i = [num for num in line.split()]
                        points_np.append([float(x), float(y), float(z),
                                          float(r), float(g), float(b),
                                          float(i)])
                        if (len(points_np) % LOG_EVERY_N) == 0:
                            print('point', len(points_np))
                    else:
                        print("[Info] The file has unregistered format")
                        return
            print('loop end')
            points_arr = np.array(points_np).transpose()
            print(len(points_arr))
            point_xyz = points_arr[:3].transpose()
            print("xyz points shape", point_xyz.shape)
            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(point_xyz)
            if len(points_arr) == 4:
                points_intensity = (points_arr[3])/255.0
                print("intensity points len", points_intensity.shape)
                points_intensity_rgb = np.vstack((points_intensity,
                                                  points_intensity,
                                                  points_intensity)).T
                print("intensity_rgb points shape", points_intensity_rgb.shape)
                pcd.colors = o3d.utility.Vector3dVector(points_intensity_rgb)
            elif len(points_arr) == 7:
                points_red = (points_arr[4]) / 255.0
                points_green = (points_arr[5]) / 255.0
                points_blue = (points_arr[6]) / 255.0
                points_rgb = np.vstack((points_red,
                                        points_green,
                                        points_blue)).T

                # points_intensity = ((points_arr[3]) / 255.0).T
                # print("intensity points len", points_intensity.shape)
                print("rgb points shape", points_rgb.shape)
                pcd.colors = o3d.utility.Vector3dVector(points_rgb)
                #pcd.intensities = o3d.utility.Vector3dVector(points_intensity)
            if pcd is not None:
                print("[Info] Successfully read", file_name)
                # Point cloud
                return pcd

        except Exception:
            print("[Info] Reading .pts file failed", file_name)

    # elif file_name.endswith(".kml"):
    #     try:
    #         with open(file_name, "r") as f:
    #             # Log every 1000000 lines.
    #             LOG_EVERY_N = 1000000
    #             points_np = []
    #             for line in f:
    #                 print(line)
    #                 if len(line.split(",")) == 3 and (line[0].isdigit() or line.startswith("-")):
    #                     y, x, z = [num for num in line.split(",")]
    #                     points_np.append([float(x), float(y), float(z)])
    #                     if (len(points_np) % LOG_EVERY_N) == 0:
    #                         print('point', len(points_np))
    #                 else:
    #                     print("[Info] The file has unregistered format")
    #         print('loop end')
    #         points_arr = np.array(points_np).transpose()
    #         print(len(points_arr))
    #         point_xyz = points_arr[:3].transpose()
    #         # points_intensity = points_arr[3]
    #         pcd = o3d.geometry.PointCloud()
    #         pcd.points = o3d.utility.Vector3dVector(point_xyz)
    #         if pcd is not None:
    #             print("[Info] Successfully read", file_name)
    #             # Point cloud
    #             return pcd
    #
    #     except Exception:
    #         print("[Info] Reading .kml file failed", file_name)

    else:
        pcd = None
        geometry_type = o3d.io.read_file_geometry_type(file_name)
        print(geometry_type)

        mesh = None
        if geometry_type & o3d.io.CONTAINS_TRIANGLES:
            mesh = o3d.io.read_triangle_model(file_name)
        if mesh is None:
            print("[Info]", file_name, "appears to be a point cloud")
            cloud = None
            try:
                cloud = o3d.io.read_point_cloud(file_name)
                # print(type(cloud))
            except Exception:
                print("[Info] Unknown filename", file_name)
            if cloud is not None:
                print("[Info] Successfully read", file_name)

                if not cloud.has_normals():
                    cloud.estimate_normals()
                cloud.normalize_normals()
                pcd = cloud
                #points = cloud.points
                pcd.points = o3d.utility.Vector3dVector(cloud.points)
            else:
                print("[WARNING] Failed to read points", file_name)

        if pcd is not None or mesh is not None:
            try:
                if mesh is not None:
                    # Triangle model
                    _scene.scene.add_model("__model__", mesh)
                else:
                    # Point cloud
                    return pcd

            except Exception as e:
                print(e)


In [3]:
filepath_mob1 = "/home/mekala/PycharmProjects/SabreProject_code/Sabre_proj/SABRE - Selected Static Scan Data/SABRE ADVANCED 3D - Selected MMS Data/"
filename1 = "SABRE MMS_S3 - 0002.pts"
filepath1 = filepath_mob1 + filename1
pc1 = load_file(filepath1)

filepath_static2 = "/home/mekala/PycharmProjects/SabreProject_code/Sabre_proj/SABRE - Selected Static Scan Data/SABRE - Selected Static Scan Data/"
filename2 = "SABRE Static Scan_T17_003.pts"
filepath2 = filepath_static2 + filename2
pc2 = load_file(filepath2)

/home/mekala/PycharmProjects/SabreProject_code/Sabre_proj/SABRE - Selected Static Scan Data/SABRE ADVANCED 3D - Selected MMS Data/SABRE MMS_S3 - 0002.pts
point 1000000
point 2000000
point 3000000
point 4000000
point 5000000
point 6000000
point 7000000
point 8000000
point 9000000
point 10000000
point 11000000
loop end
4
xyz points shape (11008825, 3)
intensity points len (11008825,)
intensity_rgb points shape (11008825, 3)
[Info] Successfully read /home/mekala/PycharmProjects/SabreProject_code/Sabre_proj/SABRE - Selected Static Scan Data/SABRE ADVANCED 3D - Selected MMS Data/SABRE MMS_S3 - 0002.pts
/home/mekala/PycharmProjects/SabreProject_code/Sabre_proj/SABRE - Selected Static Scan Data/SABRE - Selected Static Scan Data/SABRE Static Scan_T17_003.pts
point 1000000
point 2000000
point 3000000
point 4000000
point 5000000
point 6000000
point 7000000
point 8000000
point 9000000
point 10000000
point 11000000
point 12000000
point 13000000
point 14000000
point 15000000
point 16000000
point 17

#### pc1 and pc2 are the original size point clouds

In [8]:
'''
function that returns the down sampled point cloud 
and fpfh parameters of the down sampled point cloud
the down sampling defined by the parameter voxel_size
'''
def preprocess_point_cloud(pcd, voxel_size):
    pcd_down = pcd.voxel_down_sample(voxel_size)
    pcd_down.estimate_normals(
        o3d.geometry.KDTreeSearchParamHybrid(radius=voxel_size * 2.0,
                                             max_nn=30))
    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
        pcd_down,
        o3d.geometry.KDTreeSearchParamHybrid(radius=voxel_size * 5.0,
                                             max_nn=100))
    return pcd_down, pcd_fpfh

In [9]:
# Downsampling
voxel_size = 0.3

pc1_down, pc1_fpfh = preprocess_point_cloud(pc1, voxel_size)
pc2_down, pc2_fpfh = preprocess_point_cloud(pc2, voxel_size)

In [10]:
# Sizes of the downsampled point clouds points sets
pc1_down_points = np.asarray(pc1_down.points)
pc2_down_points = np.asarray(pc2_down.points)
print(pc1_down_points.shape)
print(pc2_down_points.shape)

(56746, 3)
(123234, 3)


#### Defining device for this notebook

In [11]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


### Initial RANSAC alignment

In [12]:
print("voxel_size = ", voxel_size)
distance_threshold = 2.5 * voxel_size
print("Distance threshold: ", distance_threshold)
mutual_filter = True
print("mutual_filter = ", mutual_filter)
max_iterations = 1000000
print("max_iterations = ", max_iterations)
max_validation = np.min([len(pc1_down.points), len(pc2_down.points)]) // 2
print("max_validation = ", max_validation)

# getting the current date and time
start = datetime.now()
# getting the date and time from the current date and time in the given format
start_date_time = start.strftime("%m/%d/%Y, %H:%M:%S")
print('\nRANSAC Started', start_date_time, '\n')
print('Running RANSAC\n')
result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
    pc1_down, pc2_down, pc1_fpfh, pc2_fpfh,
    mutual_filter=mutual_filter,
    max_correspondence_distance=distance_threshold,
    estimation_method=o3d.pipelines.registration.
    TransformationEstimationPointToPoint(True),
    ransac_n=3,
    checkers=[
        o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
        o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
    ],
    criteria=o3d.pipelines.registration.RANSACConvergenceCriteria(
        max_iterations, max_validation))  # max_validation replaces args.confidence in mobile-static
# getting the current date and time
finish = datetime.now()
# getting the date and time from the current date and time in the given format
finish_date_time = finish.strftime("%m/%d/%Y, %H:%M:%S")
print('RANSAC Finished', finish_date_time,
      "\nGlobal registration took %.3f sec.\n" % (finish - start).total_seconds())


voxel_size =  0.3
Distance threshold:  0.75
mutual_filter =  True
max_iterations =  1000000
max_validation =  28373

RANSAC Started 10/31/2023, 13:27:46 

Running RANSAC

RANSAC Finished 10/31/2023, 13:27:51 
Global registration took 4.654 sec.



#### RANSAC transformation matrix

In [13]:
trans = result.transformation
print("The estimated transformation matrix:")
print(trans)
print("Saving the transformation matrix in ransac_transformation_matrix.txt ...")
np.savetxt('ransac_transformation_matrix.txt', trans)
print("")

The estimated transformation matrix:
[[-9.43233485e-01 -1.57761108e-01  1.40060056e-02  1.87082909e+06]
 [ 1.58158779e-01 -9.42701268e-01  3.27759482e-02  6.67496680e+06]
 [ 8.39856549e-03  3.46395016e-02  9.55773881e-01 -2.23590480e+05]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
Saving the transformation matrix in ransac_transformation_matrix.txt ...



In [14]:
# trans = [[-9.03772418e-01, -1.08568484e-01,  6.37057409e-03,  1.53870607e+06],
#  [ 1.08726265e-01, -9.03210808e-01,  3.19549352e-02,  6.45150100e+06],
#  [ 2.50982261e-03,  3.24869649e-02,  9.09709064e-01, -2.06839037e+05],
#  [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00]]


#### Applying RANSAC transformation on original and downsampled point clouds and visualising the result with original point clouds

In [15]:
pc1_down_ransac = pc1_down.transform(result.transformation)
pc1_ransac = pc1.transform(result.transformation)

In [16]:
pc1_down_ransac_points = np.asarray(pc1_down_ransac.points)
print(pc1_down_ransac_points.shape)
print(pc2_down_points.shape)

(56746, 3)
(123234, 3)


In [17]:
# coloring the point clouds
source_color=(1, 0.706, 0)
target_color=(0, 0.651, 0.929)
pc1_ransac.paint_uniform_color(source_color)
pc2.paint_uniform_color(target_color)

PointCloud with 17814760 points.

In [19]:
o3d.visualization.draw_geometries([pc1_ransac, pc2])

#### Cropping downsampled point cloud 2 (static scan) to the size of downsampled and transformed point cloud 1 (mobile scan)

In [20]:
# Crop point cloud 2 to the size of transformed point cloud 1
oriented_bounding_box = pc1_down_ransac.get_oriented_bounding_box()
oriented_bounding_box.color = (0, 1, 0)
pc2_down_croppped = pc2_down.crop(oriented_bounding_box)


In [21]:
# coloring the downsampled, traansformed point cloud 1 
# and the cropped point cloud 2
pc1_down_ransac.paint_uniform_color(source_color)
pc2_down_croppped.paint_uniform_color(target_color)

PointCloud with 88167 points.

In [22]:
# Both point clouds with visualization of the bbox
o3d.visualization.draw_geometries([pc1_down_ransac, pc2_down_croppped, oriented_bounding_box, pc1_down_ransac])

### Chamfer distance

In [23]:
def compute_chamfer_distance(pcd1, pcd2):
    """
    Compute the Chamfer distance between two point clouds.

    Parameters:
    - pcd1, pcd2: Open3D point cloud objects.

    Returns:
    - chamfer_distance: The Chamfer distance between the two point clouds.
    """
    
    # Compute distance from pcd1 to pcd2
    distances_1_to_2 = pcd1.compute_point_cloud_distance(pcd2)
    avg_distance_1_to_2 = np.mean([np.min(dist) for dist in distances_1_to_2])

    # Compute distance from pcd2 to pcd1
    distances_2_to_1 = pcd2.compute_point_cloud_distance(pcd1)
    avg_distance_2_to_1 = np.mean([np.min(dist) for dist in distances_2_to_1])

    # Compute the Chamfer distance
    chamfer_distance = (avg_distance_1_to_2 + avg_distance_2_to_1) / 2

    return chamfer_distance

In [24]:
chamfer_dist = compute_chamfer_distance(pc1_down_ransac, pc2_down_croppped)
print(f"Chamfer Distance: {chamfer_dist}")

Chamfer Distance: 3.26757629988784


#### RANSAC Evaluation

In [25]:
#RANSAC Evaluation

fitness = result.fitness
print("Fitness:")
print(fitness)
print("")

rmse = result.inlier_rmse
print("RMSE of all inlier correspondences:")
print(rmse)
print("")

# trans = result.transformation
# print("The estimated transformation matrix:")
# print(trans)
# print("Saving the transformation matrix in ransac_transformation_matrix.txt ...")
# np.savetxt('ransac_transformation_matrix.txt', trans)
# print("")

correspondences = result.correspondence_set
print("Correspondence Set:")
print(correspondences)
print("")

Fitness:
0.65437211433405

RMSE of all inlier correspondences:
0.3212799951430704

Correspondence Set:
std::vector<Eigen::Vector2i> with 37133 elements.
Use numpy.asarray() to access data.



In [26]:
def registration_error(sour, targ):
    # # Make source and target of the same size
    # minimum_len = min(len(sour), len(targ))
    # source = sour[:minimum_len, :3]
    # target = sour[:minimum_len, :3]
    # # Apply transformation to point cloud
    # source_transformed = np.dot(transformation[:3, :3], source.T).T + transformation[:3, 3]
    # # Compute the difference between the transformed source and target point clouds
    # diff = np.subtract(target, source_transformed)
    # # RMSE of the difference
    # rmse = np.sqrt(np.mean(np.sum(diff ** 2, axis=1)))
    # # Compute the rotational error using quaternions
    # r = R.from_matrix(transformation)
    # q = r.as_quat()
    # q_target = R.from_matrix(np.identity(3)).as_quat()
    # rot_error = np.arccos(np.abs(np.dot(q, q_target))) * 180 / np.pi
    # # Compute the translational error
    # trans_error = np.linalg.norm(transformation - np.array([0, 0, 0]))
    # return rmse, rot_error, trans_error
    print('Calculating errors...')
    # Calculate the centroid of the source and target points
    source_centroid = np.mean(sour, axis=0)
    target_centroid = np.mean(targ, axis=0)
    print(f'Sour centroid: {source_centroid}')
    print(f'Targ centroid: {target_centroid}')

    # Calculate the covariance matrix of the source and target points
    source_covariance = np.cov(sour.T)
    target_covariance = np.cov(targ.T)

    # Calculate the singular value decomposition of the covariance matrices
    U_source, S_source, Vt_source = np.linalg.svd(source_covariance)
    U_target, S_target, Vt_target = np.linalg.svd(target_covariance)

    # Calculate the rotation matrix
    rot = Vt_target.T @ U_source.T

    # Calculate the translation vector
    transl = target_centroid - rot @ source_centroid
    print(f'Transl vector: {transl}')

    rot_err = rot - np.eye(3)
    # Mean Absolute error for each axis (row in rot_err)
    rot_mae_xyz = np.mean(np.abs(rot_err), axis=1)

    # Calculating translational error
    transl_xyz = np.divide(np.abs(transl), (np.abs(source_centroid)+np.abs(target_centroid)+np.abs(transl))/3)
    transl_xyz_mae = np.divide(transl_xyz, 100)
    # Calculate the mean squared error
    #mse = np.mean(np.sum((targ - (sour @ rot.T + transl)) ** 2, axis=1))

    return rot_mae_xyz, transl_xyz_mae


In [27]:
# We have pc1_down_ransac_points need pc2_down_croppped_points
pc2_down_croppped_points = np.asarray(pc2_down_croppped.points)

rot_err, transl_err = registration_error(pc1_down_ransac_points, pc2_down_croppped_points)
print(f'Rotational MAE error xyz: {rot_err}, Translational MAE error xyz: {transl_err}')
print(f'Rotational MAE: {np.mean(rot_err)}, Translational MAE: {np.mean(transl_err)}')
print("")

Calculating errors...
Sour centroid: [3.71376862e+05 7.96958451e+05 6.59854706e+01]
Targ centroid: [3.71373897e+05 7.96932915e+05 7.03453306e+01]
Transl vector: [-236455.3632088   161893.22289162   18283.80985593]
Rotational MAE error xyz: [0.14232524 0.14988598 0.04428663], Translational MAE error xyz: [0.0072443  0.00276617 0.02977796]
Rotational MAE: 0.11216595112836704, Translational MAE: 0.01326281046240546



## TRANSFORMER

### Batches of Data Preparation

#### Downsampling again to fit the memory with transformer

In [28]:
# Preprocess point cloud data. One more downsampling
voxel_size = 0.8  # Adjust as needed
source_pc_down, source_fpfh = preprocess_point_cloud(pc1_down_ransac, voxel_size)
target_pc_down, target_fpfh = preprocess_point_cloud(pc2_down_croppped, voxel_size)


In [29]:
# Downsampling result
source_pc_down_points = np.asarray(source_pc_down.points)
target_pc_down_points = np.asarray(target_pc_down.points)
print(source_pc_down_points.shape)
print(target_pc_down_points.shape)

(8628, 3)
(17143, 3)


In [25]:
o3d.visualization.draw_geometries([source_pc_down])

In [26]:
o3d.visualization.draw_geometries([target_pc_down])

#### Creating batches

#### Non overlapping batches (Depreciated)

In [27]:
# # Define the desired number of points for each batch 
# # batch_size = len(batch_sizes) = 8
# batch_size = 54
# batch_sizes = [320]*batch_size  # Adjust as needed
# #batch_sizes[0] = 2048
# print(sum(batch_sizes))
# print(len(batch_sizes))
# batch_sizes

#### The function below creates for each point cloud batch non overlapping batch with fpfh parameters batch. When a batch is smaller then a batch_size the function adds the padding. At the end it makes the cuda points and fpfh tensors of floats for the PyTorch Transformer input.

In [28]:
# Non overlaping batches

# def create_batches_with_padding(pcd, batch_sizes):
#     num_batches = len(batch_sizes)
#     batches_points = []
#     batches_fpfh = []
#     batch_start = 0
#     points = np.asarray(pcd.points)

#     for i in range(num_batches):
#         batch_size = batch_sizes[i]
#         print('batch_size', batch_size)

#         # Initialize empty arrays for the current batch
#         batch_points = []
#         batch_fpfh = []
        
#         # Cut the point cloud points to the size of the batch
#         if (len(points)-batch_start)>0:
#             batch_points = points[batch_start:(batch_start+batch_size)]
        
#         # Calculate padding sizes
#         pad_points = batch_size - len(batch_points)
#         print('pad_points ', pad_points)

#         # Pad point cloud and FPFH to match the batch size
#         if len(batch_points)>0:
#             batch_points = np.pad(batch_points, [(0, pad_points), (0, 0)], mode='constant')


#             # FPFH for the points cut
#             batch_point_cloud = o3d.geometry.PointCloud()
#             batch_point_cloud.points = o3d.utility.Vector3dVector(batch_points)

#             batch_point_cloud.estimate_normals(
#                 o3d.geometry.KDTreeSearchParamHybrid(
#                     radius=voxel_size * 2.0, max_nn=30))
#             fpfh = o3d.pipelines.registration.compute_fpfh_feature(
#                 batch_point_cloud, o3d.geometry.KDTreeSearchParamHybrid(
#                     radius=voxel_size * 5.0, max_nn=100))



#         # Convert the batch to PyTorch tensors
#         batch_points = torch.FloatTensor(batch_points).cuda()#, dtype=torch.float32)
#         #batch_fpfh = torch.tensor(fpfh, dtype=torch.float32)
#         batch_fpfh = torch.FloatTensor(np.asarray(fpfh.data).copy()).T.cuda()

#         batches_points.append(batch_points)
#         batches_fpfh.append(batch_fpfh)
#         batch_start += batch_size

#     return batches_points, batches_fpfh

In [29]:
# sour_batches_points, sour_batches_fpfh = create_batches_with_padding(source_pc_down, batch_sizes)


In [30]:
# print(len(sour_batches_points))
# print(len(sour_batches_fpfh))
# print(sour_batches_points[1].shape)
# print(sour_batches_fpfh[1].shape)

In [31]:
# targ_batches_points, targ_batches_fpfh = create_batches_with_padding(target_pc_down, batch_sizes)


In [32]:
# print(len(targ_batches_points))
# print(len(targ_batches_fpfh))
# print(targ_batches_points[53].shape)
# print(targ_batches_fpfh[1].shape)

#### Overlaping batches with 20 points overlap

In [30]:
print(source_pc_down_points.shape)
print(target_pc_down_points.shape)

(8628, 3)
(17143, 3)


In [31]:
'''
Define the number of batches 
and desired number of points for each batch 
'''
num_batches = 64
batch_sizes = [320]*num_batches  # Adjust as needed

batch_sizes
overlapping_size = 40 # Adjust as needed
# The number printed below must be more than 
# the number of points in the biggest point cloud
print(sum(batch_sizes)-overlapping_size*num_batches)

17920


In [32]:
# Overlaping batches

def create_overlapping_batches_with_padding(pcd, batch_sizes, overlapping_size=20):
    num_batches = len(batch_sizes)
    batches_points = []
    batches_fpfh = []
    batch_start = 0
    points = np.asarray(pcd.points)

    for i in range(num_batches):
        batch_size = batch_sizes[i]
        print('batch_size', batch_size)

        # Initialize empty arrays for the current batch
        batch_points = []
        batch_fpfh = []
        
        # Cut the point cloud points to the size of the batch
        if (len(points)-batch_start)>0:
            batch_points = points[batch_start:(batch_start+batch_size)]
        
        # Calculate padding sizes
        pad_points = batch_size - len(batch_points)
        print('pad_points ', pad_points)

        # Pad point cloud and FPFH to match the batch size
        if len(batch_points)>0:
            batch_points = np.pad(batch_points, [(0, pad_points), (0, 0)], mode='constant')


            # FPFH for the points cut
            batch_point_cloud = o3d.geometry.PointCloud()
            batch_point_cloud.points = o3d.utility.Vector3dVector(batch_points)

            batch_point_cloud.estimate_normals(
                o3d.geometry.KDTreeSearchParamHybrid(
                    radius=voxel_size * 2.0, max_nn=30))
            fpfh = o3d.pipelines.registration.compute_fpfh_feature(
                batch_point_cloud, o3d.geometry.KDTreeSearchParamHybrid(
                    radius=voxel_size * 5.0, max_nn=100))



        # Convert the batch to PyTorch tensors
        batch_points = torch.FloatTensor(batch_points).cuda()#, dtype=torch.float32)
        #batch_fpfh = torch.tensor(fpfh, dtype=torch.float32)
        batch_fpfh = torch.FloatTensor(np.asarray(fpfh.data).copy()).T.cuda()

        batches_points.append(batch_points)
        batches_fpfh.append(batch_fpfh)
        batch_start += (batch_size - overlapping_size)

    return batches_points, batches_fpfh

In [33]:
sour_over_batches_points, sour_over_batches_fpfh = create_overlapping_batches_with_padding(source_pc_down, batch_sizes, overlapping_size)


batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  92
batch_size 320
pad_points  320
batch_size 320
pad_points  320
batch_size 320
pad_points  320
batch_s

In [34]:
print(len(sour_over_batches_points))
print(len(sour_over_batches_fpfh))
print(sour_over_batches_points[1].shape)
print(sour_over_batches_fpfh[1].shape)

64
64
torch.Size([320, 3])
torch.Size([320, 33])


In [35]:
sour_over_batches_points[1]

tensor([[3.7138e+05, 7.9697e+05, 6.4794e+01],
        [3.7138e+05, 7.9697e+05, 6.5187e+01],
        [3.7136e+05, 7.9692e+05, 7.0469e+01],
        [3.7136e+05, 7.9692e+05, 7.5442e+01],
        [3.7136e+05, 7.9696e+05, 6.5593e+01],
        [3.7136e+05, 7.9696e+05, 6.6425e+01],
        [3.7136e+05, 7.9696e+05, 6.6551e+01],
        [3.7136e+05, 7.9697e+05, 6.6254e+01],
        [3.7138e+05, 7.9695e+05, 6.5494e+01],
        [3.7137e+05, 7.9692e+05, 6.8748e+01],
        [3.7137e+05, 7.9696e+05, 6.5409e+01],
        [3.7136e+05, 7.9695e+05, 6.5680e+01],
        [3.7137e+05, 7.9704e+05, 6.5016e+01],
        [3.7136e+05, 7.9695e+05, 6.5696e+01],
        [3.7137e+05, 7.9697e+05, 6.7324e+01],
        [3.7141e+05, 7.9696e+05, 7.0612e+01],
        [3.7138e+05, 7.9695e+05, 6.5481e+01],
        [3.7137e+05, 7.9695e+05, 6.6181e+01],
        [3.7136e+05, 7.9695e+05, 6.5833e+01],
        [3.7138e+05, 7.9697e+05, 6.7178e+01],
        [3.7132e+05, 7.9696e+05, 6.6907e+01],
        [3.7140e+05, 7.9697e+05, 6

In [36]:
targ_over_batches_points, targ_over_batches_fpfh = create_overlapping_batches_with_padding(target_pc_down, batch_sizes, overlapping_size)


batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320
pad_points  0
batch_size 320

In [38]:
print(len(targ_over_batches_points))
print(len(targ_over_batches_fpfh))
print(targ_over_batches_points[1].shape)
print(targ_over_batches_fpfh[1].shape)

64
64
torch.Size([320, 3])
torch.Size([320, 33])


#### Test

In [39]:
targ_over_batches_points[31].cpu().detach().numpy()

array([[3.7141153e+05, 7.9695769e+05, 6.4800957e+01],
       [3.7140153e+05, 7.9695175e+05, 6.6731407e+01],
       [3.7139100e+05, 7.9688725e+05, 7.7354332e+01],
       [3.7151281e+05, 7.9697862e+05, 6.7378197e+01],
       [3.7132284e+05, 7.9695288e+05, 6.7583900e+01],
       [3.7143103e+05, 7.9689681e+05, 7.3761002e+01],
       [3.7137547e+05, 7.9687156e+05, 7.1440483e+01],
       [3.7138106e+05, 7.9695556e+05, 6.5614334e+01],
       [3.7139238e+05, 7.9695256e+05, 6.6138000e+01],
       [3.7137297e+05, 7.9692588e+05, 6.6276794e+01],
       [3.7144622e+05, 7.9691200e+05, 8.0552559e+01],
       [3.7136834e+05, 7.9695706e+05, 6.5512543e+01],
       [3.7137662e+05, 7.9688119e+05, 7.6200874e+01],
       [3.7142972e+05, 7.9690269e+05, 8.0300720e+01],
       [3.7131959e+05, 7.9694606e+05, 6.8556557e+01],
       [3.7137588e+05, 7.9687925e+05, 7.3492332e+01],
       [3.7137078e+05, 7.9687969e+05, 8.0400940e+01],
       [3.7134450e+05, 7.9696931e+05, 6.5573921e+01],
       [3.7135709e+05, 7.969

In [40]:
sour_over_batches_points[31].cpu().detach().numpy()

array([], dtype=float32)

In [42]:
targ_over_batches_points[28].cpu().detach().numpy()[319]

array([3.7137553e+05, 7.9696994e+05, 6.5257942e+01], dtype=float32)

In [43]:
(targ_over_batches_points[28].cpu().detach().numpy()[319] != [0,0,0]).all()

True

In [44]:
targ_over_batches_points[1].shape

torch.Size([320, 3])


#### Chamfer distance corresponding batches finding (Batches filtering)

In [45]:
# Function to create open3d point cloud from tensor 
# and point clouds set from batches
def create_pcd_batches(num_batches,batches_tensor):
    pcd_points = []
    pcd_batches = []
    pcd = o3d.geometry.PointCloud()
    for i in range(num_batches):
        # Convert the aligned data to a NumPy array of shape (N, 3)
        points_batch = batches_tensor[i].cpu().detach().numpy()  # Assuming 'aligned_source' is a PyTorch tensor
        batch_points = []
        # Remove padding
        if len(points_batch)>0:
            for point in points_batch:
                if (point != [0,0,0]).all():
                    batch_points.append(point)

        if len(batch_points)>0:
            # Store aligned data
            pcd_points.append(batch_points)
            # Create an Open3D point cloud and assign the aligned data
            batch_pcd = o3d.geometry.PointCloud()
            batch_pcd.points = o3d.utility.Vector3dVector(batch_points)
            pcd += batch_pcd
            pcd_batches.append(batch_pcd)
    return pcd, pcd_batches

In [46]:
target_pcd, target_pcd_batches = create_pcd_batches(num_batches, targ_over_batches_points)
print(len(target_pcd_batches))
print(target_pcd_batches[1])
o3d.visualization.draw_geometries([target_pcd])

62
PointCloud with 320 points.


In [47]:
source_pcd, source_pcd_batches = create_pcd_batches(num_batches, sour_over_batches_points)
print(len(source_pcd_batches))
print(source_pcd_batches[1])
o3d.visualization.draw_geometries([source_pcd])

31
PointCloud with 320 points.


In [48]:
# Batches Chamfer distance
chamf_dist_tbl = []
for targ_idx, targ_batch in enumerate(target_pcd_batches):
    targ_dist_raw = []
    for sour_idx, sour_batch in enumerate(source_pcd_batches):
        chamfer_dist = compute_chamfer_distance(targ_batch,sour_batch)
#         print(f'Target batch {targ_idx} Source batch{sour_idx}')
#         print(f'Chamfer distance: {chamfer_dist} \n' )
        targ_dist_raw.append(chamfer_dist)
    chamf_dist_tbl.append(targ_dist_raw)

In [49]:
print("The minimum chamf dist for each target batch is:")
print(len(np.min(chamf_dist_tbl, axis=1)))
print(np.min(chamf_dist_tbl, axis=1))
print(np.min(np.min(chamf_dist_tbl, axis=1)))
print(len(np.argmin(chamf_dist_tbl, axis=1)))
print(np.argmin(chamf_dist_tbl, axis=1),"\n")
print("The minimum chamf dist for each source batch is:")
print(len(np.min(chamf_dist_tbl, axis=0)))
print(np.min(chamf_dist_tbl, axis=0))
print(np.min(np.min(chamf_dist_tbl, axis=0)))
print(len(np.argmin(chamf_dist_tbl, axis=0)))
print(np.argmin(chamf_dist_tbl, axis=0),"\n")

The minimum chamf dist for each target batch is:
62
[10.35945196 10.02688295  8.72263804  8.3408535   7.88822844  7.47442485
  7.76909076  7.85900938  7.35044494  6.59928027  7.06092438  7.6082645
  7.58901041  8.12999349  7.01003745  7.40667473  7.15237093  7.00012658
  6.9952712   7.09664277  6.85177535  7.38318483  7.71614957  8.04928466
  7.96954947  6.77226726  7.47360115  7.0939153   8.13811188  7.39961951
  7.92928238  7.59486764  7.89209628  7.29195597  7.61679497  7.75247579
  7.68677376  7.02594734  7.02438941  7.23301628  7.28880828  7.67122337
  7.67567203  7.15123749  6.32698105  6.74908958  6.79363415  7.12997193
  7.61606703  8.08747284  7.90464277  7.72725412  7.40083469  7.68603158
  7.81390978  6.85734696  7.17914331  6.65622007  6.7262974   6.86914969
  7.12788793  9.06433968]
6.326981045849367
62
[28 16  0 15 15 15 15 15 17 15 15 15 15 15 15 21 21 21 21 15 21 21 28 21
 21 21 21 21 21 15 15 21 15 21 16 16 21 21 21 15 15 21 21 15 15 21 21 21
 15 21 21 21 21 21 21 15 2

In [50]:
chamf_dist_tbl

[[10.58065423469814,
  13.353223069692291,
  13.637181172240142,
  12.18442722247598,
  14.11006659152247,
  10.719267184405815,
  12.916974865648717,
  12.972502715194764,
  14.443119883416342,
  14.168782542709124,
  16.046612268271588,
  14.111185631655346,
  12.27604019849625,
  10.929019424536474,
  15.886831493489249,
  13.222609849801682,
  10.833443392057182,
  11.217651977572348,
  13.555117609285379,
  13.929894496775228,
  15.499829418817246,
  11.308244883890065,
  15.893171926044515,
  16.090747323009722,
  11.565022955763624,
  13.306548552549836,
  13.879018823715281,
  13.12981090170436,
  10.359451955471984,
  12.681689543695555,
  16.38116901352727],
 [10.16864059413035,
  15.103050415623187,
  18.27031842638388,
  13.671460470837964,
  16.152491182174558,
  11.461096480291895,
  15.117593055011158,
  14.244824361494107,
  13.20557038946313,
  14.204558168509553,
  15.33463557576387,
  16.017923028821077,
  13.536642157120404,
  11.774129963418453,
  17.25735291901274

In [51]:
o3d.visualization.draw_geometries([source_pcd_batches[12]])

In [52]:
o3d.visualization.draw_geometries([target_pcd_batches[0]])

#### Transformer result loop

Chamfer distance loss function

In [53]:
def chamfer_distance_loss(source_points, registered_source_points):
    # Calculate Chamfer distance loss
    source_points = source_points.unsqueeze(1)  # Add a batch dimension
    dist_source_to_registered = torch.norm(
        source_points - registered_source_points, dim=3
    )
    min_dist_source_to_registered = torch.min(dist_source_to_registered, dim=2)[0]
    
    dist_registered_to_source = torch.norm(
        registered_source_points - source_points, dim=3
    )
    min_dist_registered_to_source = torch.min(dist_registered_to_source, dim=2)[0]
    
    chamfer_loss = torch.mean(min_dist_source_to_registered) + torch.mean(min_dist_registered_to_source)
    
    return chamfer_loss


Positional encoding from tutorial

In [116]:
import torch
import torch.nn as nn

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_len=512):
        super(PositionalEncoding, self).__init__()
        
        # Calculate the positional encodings
        position = torch.arange(0, max_seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
        pos_enc = torch.zeros((max_seq_len, d_model))
        pos_enc[:, 0::2] = torch.sin(position * div_term)
        pos_enc[:, 1::2] = torch.cos(position * div_term)
        pos_enc = pos_enc.unsqueeze(0)
        
        self.register_buffer('pos_enc', pos_enc)

    def forward(self, x):
        # Add positional encodings to the input tensor
        x = x + self.pos_enc[:, :x.size(0)]
        return x

Transformer module

In [127]:
import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import torch.nn.functional as F
import torch.optim as optim

# Transformer model for point cloud registration
class PointCloudTransformer(nn.Module):
    def __init__(self, d_model, nhead, num_layers, hidden_dim, num_points):
        super(PointCloudTransformer, self).__init__()
        
        # embedding layer to convert point clouds to embeddings
        self.embedding = nn.Linear(3, d_model)  # Input dimension is 3 for (x, y, z)

#         # positional encoding
#         self.positional_encoding = PositionalEncoding(d_model, num_points)
        
#         encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
#         self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        
        # multi-head self-attention layer
        self.self_attention = nn.MultiheadAttention(d_model, nhead)

        # multi-head cross-attention layer
        self.cross_attention = nn.MultiheadAttention(d_model, nhead)

        # Feed-Forward layers
        self.ffn = nn.Sequential(
            nn.Linear(d_model, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, d_model)
        )
        
        # the point cloud registration decoder
        self.decoder = nn.Linear(d_model, 3)  # Output dimension is 3 for (x, y, z)
        
#         self.init_weights()

#     def init_weights(self):
#         initrange = 0.1
#         self.embedding.weight.data.uniform_(-initrange, initrange)
#         self.linear.bias.data.zero_()
#         self.linear.weight.data.uniform_(-initrange, initrange)
        
    def forward(self, source_cloud, target_cloud):
        # Embed the source and target point clouds
        source_embedded = self.embedding(source_cloud)
        target_embedded = self.embedding(target_cloud)
        print(source_embedded.size())
        
        # encoding
        source_encoded = source_embedded
        target_encoded = target_embedded
#         source_encoded = self.positional_encoding(source_embedded)
#         target_encoded = self.positional_encoding(target_embedded)
        
#         source_encoded = self.transformer_encoder(source_encoded, src_mask)
#         target_encoded = self.transformer_encoder(target_encoded, tgt_mask)
#         print(source_encoded.size())
        
        # Self-attention for the source cloud
        source_attention_output, _ = self.self_attention(source_encoded, source_encoded, source_encoded)
        print(source_attention_output.size())
        
        # Cross-attention from source to target cloud
        cross_attention_output, _ = self.cross_attention(source_attention_output, target_encoded, target_encoded)
        print(cross_attention_output.size())
        output_ffn = self.ffn(cross_attention_output)
        print(output_ffn.size())
        
        # Decode the registration result
        registration_result = self.decoder(output_ffn)
        print(registration_result.size())
        
        return registration_result

In [92]:
# import torch
# import torch.nn as nn

# class PointCloudTransformer(nn.Module):
#     def __init__(self, input_dim, num_heads, num_layers, hidden_dim, d_model):
#         super(PointCloudTransformer, self).__init__()

#         # Embedding layers for point cloud points
# #         self.fpfh_embedding = nn.Embedding(input_dim, d_model)
#         #self.flattened_tensor = nn.Flatten()
#         self.point_embedding = nn.Linear(d_model, input_dim)
#         #self.point_embedding.weight.data = nn.init.xavier_uniform_(self.point_embedding.weight.data)  # Initialize weights
        
#         # Define positional encoding
#         self.positional_encoding = PositionalEncoding(input_dim)
        
#         # Multi-Head Self-Attention and Cross-Attention layers
#         self.self_attention = nn.MultiheadAttention(input_dim, num_heads)
#         self.cross_attention = nn.MultiheadAttention(input_dim, num_heads)
        
#         # Feed-Forward layers
#         self.ffn = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.ReLU(),
#             nn.Linear(hidden_dim, input_dim)
#         )
        
#         # Layer normalization
#         self.layer_norm = nn.LayerNorm(input_dim)
        
#         # Stacking multiple transformer layers
#         self.transformer_layers = nn.ModuleList([self.build_transformer_layer(input_dim) for _ in range(num_layers)])
    
#     def build_transformer_layer(self, input_dim):
#         return nn.Sequential(
#             nn.LayerNorm(input_dim),            
#             self.self_attention,
#             nn.LayerNorm(input_dim),
#             self.cross_attention,
#             nn.LayerNorm(input_dim),
#             self.ffn
#         )
    
#     def forward(self, source_points, target_points):
#         # Embedding source and target FPFH and points
#         source_points_embedded = self.point_embedding(source_points)
#         target_points_embedded = self.point_embedding(target_points)
        
#         # Apply positional encoding
#         source_points_embedded = self.positional_encoding(source_points_embedded)
#         target_points_embedded = self.positional_encoding(target_points_embedded)

        
#         # Pass through transformer layers
#         for layer in self.transformer_layers:
#             # Self-attention for source and target
#             source_points_embedded, _ = layer[1](source_points_embedded, source_points_embedded, source_points_embedded)
#             target_points_embedded, _ = layer[1](target_points_embedded, target_points_embedded, target_points_embedded)
            
#             # Cross-attention between source and target
#             source_points_embedded, _ = layer[3](source_points_embedded, target_points_embedded, target_points_embedded)
#             #target_points_embedded, _ = layer[3](target_points_embedded, source_points_embedded, source_points_embedded)
            
#             # Feed-forward layer
#             source_points_embedded = layer[5](source_points_embedded)
#             #target_points_embedded = layer[5](target_points_embedded)
        
#         return source_points_embedded

In [93]:
# with torch.no_grad():
#     torch.cuda.empty_cache()

In [94]:
# !pip install GPUtil
# !pip install numba

In [95]:
# import torch
# from GPUtil import showUtilization as gpu_usage
# from numba import cuda

# def free_gpu_cache():
#     print("Initial GPU Usage")
#     gpu_usage()                             

#     torch.cuda.empty_cache()

#     cuda.select_device(0)
#     cuda.close()
#     cuda.select_device(0)

#     print("GPU Usage after emptying the cache")
#     gpu_usage()

In [96]:
# free_gpu_cache()  


In [97]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [98]:
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [99]:
# # Instantiate the PointCloudTransformer
# #d_model = 64  # Choose an appropriate value
# nhead = 8  # Choose an appropriate value
# num_layers = 4  # Choose an appropriate value
# num_points = 640  # Choose the number of points in your point cloud

# model = PointCloudTransformer(num_points, nhead, num_layers)

# # Define an optimizer
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Training loop (you need to prepare your own data and iterate over it)
# for epoch in range(num_epochs):
#     optimizer.zero_grad()
#     predicted_registration = model(source_cloud, target_cloud)
#     loss = chamfer_distance_loss(predicted_registration, target_cloud)
#     loss.backward()
#     optimizer.step()

# After training, you can use the model to register point clouds
# registered_cloud = model(source_cloud, target_cloud)


In [128]:
'''
Fix the open3d point cloud rebuilding from the correct batches
Correct the loss function
'''

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Define the PointCloudTransformer class (use the previously provided code)

# Example usage
num_points = 320  # Replace with the actual input dimension of the batch size *2
num_heads = 8 # input_dim should be divisible by num_heads
num_layers = 4 #increase the number of layers if the model needs more capacity.
d_model = 64 # Model Complexity (d_model):
            #The d_model parameter determines the dimension of the model,
            #which is also the dimension of word embeddings 
            #in the Transformer. It should be large enough 
            #to capture complex patterns in your data. 
            #For point cloud registration, 
            #you may need a higher-dimensional embedding because 
            #the data is 3D. 
            #A reasonable starting point could be in the range of 64 to 256.
            #Experiment with different values 
            #to see what works best for your specific dataset.
hidden_dim = d_model*2 # common choice is d_model/2 or d_model*2

# d_model_fpfh = 33 # fpfh dimensions
# batch_size = 32
# sequence_length = 10


# Create the transformer model
transformer = PointCloudTransformer(d_model, num_heads, num_layers, hidden_dim, num_points).cuda()

# Define a loss function and optimizer for registration
#criterion = nn.MSELoss()
optimizer = optim.Adam(transformer.parameters(), lr=0.001)

# Number of iterations (epochs) for registration
num_iterations = 3
losses=[]
aligned_source_pcd_points = []
#aligned_target_pcd_points = []
#results = []
aligned_source_point_cloud = o3d.geometry.PointCloud()
#aligned_target_point_cloud = o3d.geometry.PointCloud()

for iteration in range(num_iterations):
    for i in range(len(sour_over_batches_points)):
        source_points = sour_over_batches_points[i].cuda()
        print(f'Source {i} with {source_points.shape}')
        # print(source_points)
        # source_fpfh = sour_batches_fpfh[i]
        if len(source_points)>0:
            for j in range(len(targ_over_batches_points)):
                target_points = targ_over_batches_points[j].cuda()
                #print(target_points)
                #target_fpfh = targ_batches_fpfh[i]
                print(f'   Processing target batch {j} with {target_points.shape}')
                if len(target_points)>0:
                    # Pass data through the transformer
                    registration_result = transformer(source_points, target_points)

                    # Find nearest neighbors in the source point cloud for each point in the target point cloud
                    source_points_numpy = registration_result.cpu().detach().numpy()
                    #target_points_numpy = target_embedding.cpu().detach().numpy()
                    #nn_model = NearestNeighbors(n_neighbors=1).fit(source_points_numpy)
                    #distances, source_indices = nn_model.kneighbors(target_points_numpy)

                    aligned_source_batch_points = []
                    if len(source_points_numpy)>0:
                        for point in source_points_numpy:
                            if (point != [0,0,0]).all():
                                aligned_source_batch_points.append(point)

#                     aligned_target_batch_points = []
#                     if len(target_points_numpy)>0:
#                         for point in target_points_numpy:
#                             if (point != [0,0,0]).all():
#                                 aligned_target_batch_points.append(point)

                    if len(aligned_source_batch_points)>0:
                        print(f'aligned_source_batch_points.shape=\n{np.asarray(aligned_source_batch_points).shape}')
                        # Store aligned data
                        aligned_source_pcd_points.extend(np.asarray(aligned_source_batch_points))
                        # Create an Open3D point cloud and assign the aligned data
                        aligned_source_batch_pcd = o3d.geometry.PointCloud()
                        aligned_source_batch_pcd.points = o3d.utility.Vector3dVector(aligned_source_batch_points)
                        aligned_source_point_cloud += aligned_source_batch_pcd


#                     if len(aligned_target_batch_points)>0:
#                         print(f'aligned_target_batch_points.shape=\n{np.asarray(aligned_target_batch_points).shape}')
#                         # Store aligned data
#                         aligned_target_pcd_points.extend(np.asarray(aligned_target_batch_points))
#                         # Create an Open3D point cloud and assign the aligned data
#                         aligned_target_batch_pcd = o3d.geometry.PointCloud()
#                         aligned_target_batch_pcd.points = o3d.utility.Vector3dVector(aligned_target_batch_points)
#                         aligned_target_point_cloud += aligned_target_batch_pcd

                    # Calculate the loss as the mean squared error between aligned source and target points
                    loss = chamfer_distance_loss(registration_result, target_points)

                    # Zero the gradients, perform backpropagation, and update the model
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

    print(f"Iteration {iteration}/{num_iterations}: Loss = {loss.item()}")
    losses.append(loss.item())
    #print(aligned_source_point_cloud)
    #print(aligned_target_point_cloud)
    print(np.asarray(aligned_source_point_cloud.points).shape)
    #print(np.asarray(aligned_target_point_cloud.points).shape)

    rot_err, transl_err = registration_error(np.asarray(aligned_source_point_cloud.points), target_pc_down_points)
    print(f'Rotational MAE error xyz: {rot_err}, \nTranslational MAE error xyz: {transl_err}')
    print(f'Rotational MAE: {np.mean(rot_err)}, \nTranslational MAE: {np.mean(transl_err)}')
    print("")

    # The 'source_embedding' now represents the aligned source point cloud
    # You can convert it to a NumPy array for further processing

    o3d.visualization.draw_geometries([aligned_source_point_cloud, target_pc_down])


Source 0 with torch.Size([320, 3])
   Processing target batch 0 with torch.Size([320, 3])
torch.Size([320, 64])
torch.Size([320, 64])
torch.Size([320, 64])
torch.Size([320, 64])
torch.Size([320, 3])
aligned_source_batch_points.shape=
(320, 3)


IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 3)

Input Dimension (input_dim): Since you have FPFH features and point cloud points for both source and target inputs, the input_dim for your embedding layers should account for the total number of unique indices across all features. The input_dim should be large enough to cover the maximum index value across both FPFH and point cloud points. If the maximum index value for FPFH is 320 and for point cloud points is 320 as well, you might set input_dim to 640 to ensure coverage.

Model Dimension (d_model): The d_model parameter determines the dimension of the embedding vectors. You can experiment with different values, but common choices include 128, 256, or 512. For example, setting d_model to 128 would result in 128-dimensional embeddings.

Number of Layers (num_layers): The number of layers in the transformer can impact its capacity to learn from the data. Starting with a moderate number of layers (e.g., 6) is a reasonable choice, and you can adjust it based on the complexity of your registration task. You can increase the number of layers if the model needs more capacity.

Number of Attention Heads (num_heads): The number of attention heads in the multi-head attention mechanism can be a hyperparameter to tune. A common choice is 4, but you can experiment with different values to see what works best for your data and task.

Hidden Dimension (hidden_dim): The hidden dimension in the feed-forward layers can be set to a fraction of d_model. A common choice is d_model/2 or d_model*2. For example, if d_model is 128, you might set hidden_dim to 64 or 256.

In [107]:
m = nn.Linear(3, 30).cuda()
input = source_points
output = m(input)
print(output.size())

torch.Size([320, 30])
