In [None]:
import numpy as np
import os
import sys
import cv2
import matplotlib.pyplot as plt
import open3d as o3d
%matplotlib inline 
src_path = os.path.abspath("../..")
if src_path not in sys.path:
    sys.path.append(src_path)
%load_ext autoreload
from dataset_utils import create_nuscenes_odometry_dataset
from dataset.filters.filter_list import FilterList
from dataset.filters.range_filter import RangeFilter
from dataset.filters.apply_pose import ApplyPose
import scipy
from scipy.spatial.distance import cdist
from normalized_cut import normalized_cut
from ncuts_utils import ncuts_chunk,kDTree_1NN_feature_reprojection_colors, get_merge_pcds
from dataset_utils import * 
from point_cloud_utils import get_pcd, transform_pcd, kDTree_1NN_feature_reprojection, remove_isolated_points, get_subpcd, get_statistical_inlier_indices, merge_chunks_unite_instances, merge_unite_gt, remove_semantics, merge_chunks_unite_instances2
from aggregate_pointcloud import aggregate_pointcloud
from visualization_utils import generate_random_colors, color_pcd_by_labels,generate_random_colors_map
from sam_label_distace import sam_label_distance
from chunk_generation import subsample_positions, chunks_from_pointcloud, indices_per_patch, tarl_features_per_patch, image_based_features_per_patch, dinov2_mean, get_indices_feature_reprojection
from metrics.metrics_class import Metrics
import shutil
from tqdm import tqdm
lib_path = os.path.expanduser('~') + '/unsup_3d_instances/pipeline/segmentation/utils/voxel_clustering_dependencies/build/'
sys.path.insert(0, lib_path+ "clustering")
felsenzwalb_path = '/home/cedric/UnScene3D_collaboration_fork/lib/utils/cpp_utils/build/lib.linux-x86_64-cpython-39/'
import felzenszwalb_cpp
import pycluster
from scipy.spatial import KDTree

In [None]:
import sklearn
from sklearn.cluster import DBSCAN, HDBSCAN
import hdbscan

#cvc clustering setup 
#params = [2,0.4,1.5]
params = [0.1,0.2,0.5]
#cvc = pycluster.CVC_cluster(params)

def uniform_down_sample_with_indices(points, every_k_points):
        # Create a new point cloud for the downsampled output

        # List to hold the indices of the points that are kept
        indices = []

        # Iterate over the points and keep every k-th point
        for i in range(0, points.shape[0], every_k_points):
            indices.append(i)

        return indices

def downsample_chunk(points):
        num_points_to_sample = 30000
        if points.shape[0] > num_points_to_sample: 
            every_k_points = int(points.shape[0] /num_points_to_sample)
        else : 
            every_k_points = 1 
        indeces = uniform_down_sample_with_indices(
            points, every_k_points)


        return points[indeces]

def clustering_logic(pcd_nonground_chunk, pcd_ground_chunk,
                        eps=0.3, min_samples=10,method='hdbscan'):
    """
    Perform DBSCAN clustering on the point cloud data.

    :param cur_pcd: Current point cloud for clustering.
    :param pcd_all: All point cloud data.
    :param eps: The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    :param min_samples: The number of samples in a neighborhood for a point to be considered as a core point.
    :return: Cluster labels for each point in the point cloud.
    """
    
    inliers = get_statistical_inlier_indices(pcd_ground_chunk)
    ground_inliers = get_subpcd(pcd_ground_chunk, inliers)
    mean_hight = np.mean(np.asarray(ground_inliers.points)[:,2])
    in_idcs = np.where(np.asarray(ground_inliers.points)[:,2] < (mean_hight + 0.2))[0]
    cut_hight = get_subpcd(ground_inliers, in_idcs)
    cut_hight.paint_uniform_color([0, 0, 0])
    
    in_idcs = None
    
    #in_idcs = np.where(np.asarray(pcd_nonground_chunk.points)[:,2] > (mean_hight + 0.05))[0]
    #pcd_nonground_corrected = get_subpcd(pcd_nonground_chunk, in_idcs)
    pcd_nonground_corrected = pcd_nonground_chunk
    
    pcd_nonground_downsampled = o3d.geometry.PointCloud()
    pts_downsampled = downsample_chunk(np.asarray(pcd_nonground_corrected.points))
    pcd_nonground_downsampled.points = o3d.utility.Vector3dVector(pts_downsampled)
    
    #clustering = DBSCAN(eps=eps, min_samples=min_samples)
    #clustering = HDBSCAN(min_cluster_size=10).fit(pts_downsampled)
    if method == 'hdbscan': 
        clustering = hdbscan.HDBSCAN(algorithm='best', alpha=1., approx_min_span_tree=True,
                                    gen_min_span_tree=True, leaf_size=100,
                                    metric='euclidean', min_cluster_size=10, min_samples=None
                                )
        clustering.fit(pts_downsampled)
        
        labels_not_road = clustering.labels_
        
    elif method == 'felzenswalb':    
        pcd_nonground_downsampled.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
        #print("estimated normals")
        ## Optionally, you can orient the normals
        #o3d.geometry.PointCloud.orient_normals_consistent_tangent_plane(pcd, k=10)
        #print("create mesh")
        # Apply Poisson reconstruction
        mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd_nonground_downsampled, depth=9)
        #print("converted")
        # Optionally, you can remove low density vertices
        #print("Mesh remove")
        vertices_to_remove = densities < np.quantile(densities, 0.05)
        mesh.remove_vertices_by_mask(vertices_to_remove)

        normals = np.asarray(mesh.vertex_normals)
        norm_colors = (normals - normals.min(axis=0)) / (normals.max(axis=0) - normals.min(axis=0))
        mesh.vertex_colors = o3d.utility.Vector3dVector(norm_colors)

        vertices = np.array(mesh.vertices).astype(np.single)
        colors = np.array(mesh.vertex_colors).astype(np.single)
        faces = np.array(mesh.triangles).astype(np.intc)
        
        o3d.visualization.draw_geometries([mesh])

        min_vert_num = 2000

        comps, connectivity = felzenszwalb_cpp.segment_mesh(vertices, faces, colors, 0.5, min_vert_num)  # orig was min_vert_num=50
        
        
        # Filter out small segments and floaters
        mesh_points = np.array(mesh.vertices)
        vertices_tree = KDTree(mesh_points)
        segment_ids, segment_counts = np.unique(comps, return_counts=True)
        

        filtered_comps = comps.copy()
        for segment_id, segment_count in zip(segment_ids, segment_counts):
                
                if (segment_id not in connectivity) or (segment_count < min_vert_num):
                        tmp = segment_id.copy()
                        _, closest_point_ids = vertices_tree.query(mesh_points[comps == segment_id][0], k=segment_count+1)
                        target_segment_id = comps[closest_point_ids][np.nonzero(comps[closest_point_ids] - tmp)[0][0]]

                        # update at location 
                        filtered_comps[comps == segment_id] = target_segment_id

        seg_connectivity = connectivity
        # Associate each point to a segment
        kdtree = KDTree(vertices)
        _, idx = kdtree.query(pts_downsampled)
        labels_not_road = filtered_comps[idx]
        print("pts shape",pts_downsampled.shape)
        print('labels shape',labels_not_road.shape)
        

    
    else : 
        capr = None
        hash_table = None
        cluster_indices = None
        cluster_id = None
        capr = cvc.calculateAPR(pts_downsampled)
        hash_table = cvc.build_hash_table(capr)
        cluster_indices = cvc.cluster(hash_table,capr)
        cluster_id = cvc.most_frequent_value(cluster_indices)
        labels_not_road = np.ones((pts_downsampled.shape[0], 1)) * -1
        
        for i in range(len(cluster_id)):
                for j in range(len(cluster_indices)):
                        if cluster_indices[j] == cluster_id[i]:
                                ##append point to cloud with certain colour 
                                labels_not_road[j] = cluster_id[i]
                                #pt_colors[nonground_idcs[j]] = color  
    
        
    

        #labels_not_road = np.asarray(cluster_indices) 
        
    colors_gen = generate_random_colors(5000)
    
    # Reproject cluster labels to the original point cloud size
    cluster_labels = np.ones((len(pcd_nonground_corrected.points), 1)) * -1
    labels_non_ground = kDTree_1NN_feature_reprojection(cluster_labels, pcd_nonground_corrected, labels_not_road.reshape(-1,1), pcd_nonground_downsampled )
    colors = np.zeros((labels_non_ground.shape[0],3))
    unique_labels = list(np.unique(labels_non_ground))
    
    for j in unique_labels:
            cur_idcs = np.where(labels_non_ground == j)[0]
            
            colors[cur_idcs] = np.array(colors_gen[unique_labels.index(j)])
        
    pcd_nonground_corrected.colors = o3d.utility.Vector3dVector(colors / 255.)
    
    #o3d.visualization.draw_geometries([pcd_nonground_corrected])
    
    
    
    return pcd_nonground_corrected, cut_hight, in_idcs

Here we define the dataset depending on nuscenes sequence!

In [None]:
DATASET_PATH = '/media/cedric/Datasets1/nuScenes_mini_v2/nuScenes'


dist_threshold = 5 #moving object filter threshold 
dataset_type = 'v1.0-mini'
if dataset_type == 'v1.0-trainval' :  ##this script can be used for full eval with path change 
        DATASET_PATH = '/media/cedric/Datasets1/nuScenes_train'

minor_voxel_size = 0.05
major_voxel_size = 0.35
chunk_size = np.array([25, 25, 25]) #meters
overlap = 3 #meters
ground_segmentation_method = 'patchwork' 
NCUT_ground = False 
out_folder_ncuts = 'test_data/'
if os.path.exists(out_folder_ncuts):
        shutil.rmtree(out_folder_ncuts)
os.makedirs(out_folder_ncuts)

out_folder = 'pcd_preprocessed_nuscenes/' + dataset_type + '/'
if os.path.exists(out_folder) == False : 
        os.makedirs(out_folder)

out_dbscan = f'{out_folder}out_nuscenes_dbscan/'
map_out_pred = f'{out_folder}out_nuscenes_dbscan/maps/'

if os.path.exists(map_out_pred) == False : 
                os.makedirs(map_out_pred)




out_nuscenes_instances = f'{out_folder}out_nuscenes_instance/'
map_out_instances = f'{out_folder}out_nuscenes_instance/maps/'

seqs = list(range(0,10)) ##currently downloaded up to 95 range 

all_instances = []




colors = generate_random_colors_map(600,0)
for SEQUENCE_NUM in tqdm(seqs) : 
        print('current sequence',SEQUENCE_NUM)
        dataset = create_nuscenes_odometry_dataset(DATASET_PATH,SEQUENCE_NUM,ncuts_mode=True, sam_folder_name="SAM", 
                        dinov2_folder_name="Dinov2",dist_threshold=dist_threshold,dataset_type=dataset_type)
        
        ind_start = 0
        ind_end = len(dataset)  
                        
        #if os.path.exists(f'{out_folder}all_poses_' + str(SEQUENCE_NUM) + '_' + str(0) + '.npz') == False:
        process_and_save_point_clouds(dataset,ind_start,ind_end,minor_voxel_size=minor_voxel_size,
                                major_voxel_size=major_voxel_size,icp=False,
                                out_folder=out_folder,sequence_num=SEQUENCE_NUM,
                                ground_segmentation_method=ground_segmentation_method)
        
        
        #if os.path.exists(f'{out_folder}pcd_ground_minor' + str(SEQUENCE_NUM) + '.pcd') == False:
        pcd_ground_minor, pcd_nonground_minor,\
                all_poses, T_pcd, first_position,labels = load_and_downsample_point_clouds(out_folder,SEQUENCE_NUM,minor_voxel_size,\
                                                                        ground_mode=ground_segmentation_method)

        o3d.io.write_point_cloud(f'{out_folder}pcd_ground_minor.pcd', pcd_ground_minor, write_ascii=False, compressed=False, print_progress=False)
        o3d.io.write_point_cloud(f'{out_folder}pcd_nonground_minor.pcd', pcd_nonground_minor, write_ascii=False, compressed=False, print_progress=False)
        np.savez(f'{out_folder}nuscenes_labels_preprocessed.npz',
                                                instance_nonground= labels['instance_nonground'],
                                                instance_ground= labels['instance_ground'],
                                                seg_ground = labels['seg_ground'],
                                                seg_nonground= labels['seg_nonground']
                                                )
        
        
        pcd_ground_minor = o3d.io.read_point_cloud(f'{out_folder}pcd_ground_minor.pcd')
        pcd_nonground_minor = o3d.io.read_point_cloud(f'{out_folder}pcd_nonground_minor.pcd')
        
        nuscenes_labels_orig = {}
        with np.load(f'{out_folder}nuscenes_labels_preprocessed.npz') as data :
                nuscenes_labels_orig['instance_ground'] = data['instance_ground']
                nuscenes_labels_orig['instance_nonground'] = data['instance_nonground']
                nuscenes_labels_orig['seg_nonground'] = data['seg_nonground']
                nuscenes_labels_orig['seg_ground'] = data['seg_ground']
        
                
        
        with np.load(f'{out_folder}all_poses_{SEQUENCE_NUM}_0.npz') as data:
                all_poses = data['all_poses']
                T_pcd = data['T_pcd']
                first_position = T_pcd[:3, 3]
        
        
        pcd_new = o3d.geometry.PointCloud()
        pcd_new.points = o3d.utility.Vector3dVector(np.asarray(pcd_nonground_minor.points))
        
        map_labelled = color_pcd_by_labels(pcd_new,\
                        nuscenes_labels_orig['instance_nonground'].reshape(-1,1))
        
        #o3d.visualization.draw_geometries([map_labelled])
        
        poses, positions, \
        sampled_indices_local, sampled_indices_global = subsample_and_extract_positions(all_poses,ind_start=ind_start)
        
        pcd_nonground_chunks, pcd_ground_chunks,\
        pcd_nonground_chunks_major_downsampling, pcd_ground_chunks_major_downsampling, \
        indices,indices_ground, center_positions, \
        center_ids, chunk_bounds, nuscenes_labels = chunk_and_downsample_point_clouds(pcd_nonground_minor, pcd_ground_minor, T_pcd, positions, 
                                                                    first_position, sampled_indices_global, chunk_size=chunk_size, 
                                                                    overlap=overlap, major_voxel_size=major_voxel_size,kitti_labels=nuscenes_labels_orig)
                                                                    
                                                                    
        
        
        
        cams = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_FRONT_RIGHT"]
        cam_ids = [0]
        
        #out_dbscan = 'out_dbscan/'
        #if os.path.exists(out_dbscan) == True : 
        #        shutil.rmtree(out_dbscan)
        out_dbscan_cur = out_dbscan + str(SEQUENCE_NUM) + '/'
        if os.path.exists(out_dbscan_cur) == True : 
                shutil.rmtree(out_dbscan_cur)
        os.makedirs(out_dbscan_cur)
        
        
        
        instances = np.hstack((nuscenes_labels_orig['instance_nonground'].reshape(-1,),nuscenes_labels_orig['instance_ground'].reshape(-1,)))
        
        patchwise_indices = indices_per_patch(T_pcd, center_positions, positions, first_position, sampled_indices_global, chunk_size)
        out_data = []
        print(len(center_ids))
        for sequence in range(len(center_ids)):
                        obstacle_chunk, ground_chunk, in_idcs = clustering_logic(pcd_nonground_chunks[sequence],pcd_ground_chunks[sequence],
                        eps=0.4, min_samples=10,method='hdbscan')
                
                        #kitti_chunk_instance = color_pcd_by_labels(obstacle_chunk,kitti_labels['nonground']['instance'][sequence][in_idcs].reshape(-1,),
                        
                        #o3d.visualization.draw_geometries([obstacle_chunk + ground_chunk])
                        #print(kitti_chunk_instance,obstacle_chunk)
                        
                        name =  str(center_ids[sequence]).zfill(6) + '.pcd'
                        
                        o3d.io.write_point_cloud(out_dbscan_cur + name, obstacle_chunk + ground_chunk, write_ascii=False, compressed=False, print_progress=False)


In [None]:
colors = generate_random_colors_map(400,0)

def get_merge_pcds(out_folder_ncuts):
        point_clouds = []

        # List all files in the folder
        files = os.listdir(out_folder_ncuts)
        files.sort()

        # Filter files with a .pcd extension
        pcd_files = [file for file in files if file.endswith(".pcd")]
        print(pcd_files)
        # Load each point cloud and append to the list
        for pcd_file in pcd_files:
                        
                file_path = os.path.join(out_folder_ncuts, pcd_file)
                point_cloud = o3d.io.read_point_cloud(file_path)
                point_clouds.append(point_cloud)
        return point_clouds

Now we can split the point cloud into chunks based on a tbd chunk_size

In [None]:
#out_dbscan = 'out_dbscan/'

for i in seqs: 
    print('cur seq',i)
    out_instance_cur = out_nuscenes_instances + str(i) + '/'
    out_dbscan_cur = out_dbscan + str(i) + '/'
    
    point_clouds = get_merge_pcds(out_dbscan_cur)
    if len(point_clouds) == 0 : 
        continue
        
    merge = merge_chunks_unite_instances(point_clouds)
    
    
    merge_nuscenes_instance = o3d.io.read_point_cloud(map_out_instances + "merge_part_nuscenes_instance" + str(i)  + ".pcd") 
    #o3d.visualization.draw_geometries([merge_nuscenes_instance])
    unique_colors, labels_ncuts = np.unique(np.asarray(merge.colors), axis=0, return_inverse=True)
    unique_colors, labels_nuscenes = np.unique(np.asarray(merge_nuscenes_instance.colors),axis=0, return_inverse=True)
        
    pred_instance = remove_semantics(labels_nuscenes,labels_ncuts)
    
    o3d.io.write_point_cloud(map_out_pred + "merge_part_nuscenes_hdbscan" + str(i)  + ".pcd", merge, write_ascii=False, compressed=False, print_progress=False)
    o3d.io.write_point_cloud(map_out_pred + "merge_part_nuscenes_hdbscan_instances_" + str(i)  + ".pcd", color_pcd_by_labels(merge,pred_instance), write_ascii=False, compressed=False, print_progress=False)
    




In [None]:

merge_pcd_pred = o3d.geometry.PointCloud()
merge_pcd_instance = o3d.geometry.PointCloud()
merge_instances_only = o3d.geometry.PointCloud()

metrics_clustering = Metrics(name='hdbscan',min_points=50)

for i in seqs : 
	if os.path.exists(map_out_instances + "merge_part_nuscenes_instance" + str(i)  + ".pcd") == False : 
		continue
		
	all_pred =  o3d.io.read_point_cloud(map_out_pred + "merge_part_nuscenes_hdbscan" + str(i)  + ".pcd")
	cur_gt = o3d.io.read_point_cloud(map_out_instances + "merge_part_nuscenes_instance" + str(i)  + ".pcd")
	instances_pred = o3d.io.read_point_cloud(map_out_pred + "merge_part_nuscenes_hdbscan_instances_" + str(i)  + ".pcd")

	
	unique_colors, labels_ncuts = np.unique(np.asarray(instances_pred.colors), axis=0, return_inverse=True)
	unique_colors, labels_ncuts_all = np.unique(np.asarray(all_pred.colors), axis=0, return_inverse=True)
	unique_colors, labels_nuscenes = np.unique(np.asarray(cur_gt.colors),axis=0, return_inverse=True)
	metrics_clustering.add_stats(labels_ncuts_all,labels_ncuts,labels_nuscenes)
	
	merge_pcd_instance += cur_gt
	merge_instances_only += instances_pred
	merge_pcd_pred +=  all_pred
	
	

metrics_clustering.compute_stats_final()