# Semantic SfM: single scene on synthetic data generated from Kubric 

### 1a. Generate synthetic data
We developed a tool to generate synthetic data using Kubric. Please follow the instructions on the Github repo: https://github.com/ZhiangChen/data_generator_3d

### 1b. Organize data
Create a folder under `semantic_SfM/data` and organize your data following the structures below. 

```
semantic_SfM/data
    ├── kubric_0
        ├── photos
        │       ├── 0.png
        │       ├── 1.png
        │       ├── ...
        │       └── 323.png
        ├── reconstructions
        │       ├── camera_poses.npy
        │       └── combined_point_cloud.las   
        ├── segmentations_gt
        │       ├── 0.npy
        │       ├── 0.png
        │       └── ...
        └── associations
                └── depth
                        ├── 0.npy
                        ├── 0.png
                        └── ...
```

In [5]:
import os

scene_dir = '../../data/kubric_1'
pointcloud_path = os.path.join(scene_dir, 'reconstructions', 'combined_point_cloud.las')
associations_folder_path = os.path.join(scene_dir, 'associations')
segmentations_folder_path = os.path.join(scene_dir, 'segmentations')
photos_folder_path = os.path.join(scene_dir, 'photos')

### 2. Segment images using SAM 

#### 2a. SAM segmentation


In [2]:
from ssfm.image_segmentation import ImageSegmentation

In [11]:
sam_params = {}
sam_params['model_name'] = 'sam2'
sam_params['model_path'] = '../../semantic_SfM/sam2/sam2.1_hiera_large.pt'
sam_params['device'] = 'cuda:1'
sam_params['points_per_side'] = 32
sam_params['points_per_batch'] = 128
sam_params['pred_iou_thresh'] = 0.6
sam_params['stability_score_offset'] = 0.5
sam_params['box_nms_thresh'] = 0.6
sam_params['use_m2m'] = True


image_paths = [os.path.join(scene_dir, 'photos', image) for image in os.listdir(os.path.join(scene_dir, 'photos'))]

# sort images based on the values of keyimages in file names
image_paths = sorted(image_paths, key=lambda x: int(x.split('/')[-1].split('.')[0]))


In [12]:
run_segmentation = False

if run_segmentation:
    image_segmentor = ImageSegmentation(sam_params)   
    image_segmentor.batch_predict(image_paths, segmentations_folder_path, save_overlap=True)

Processing image 1/285.
Processing image 2/285.
Processing image 3/285.
Processing image 4/285.
Processing image 5/285.
Processing image 6/285.
Processing image 7/285.
Processing image 8/285.
Processing image 9/285.
Processing image 10/285.
Processing image 11/285.
Processing image 12/285.
Processing image 13/285.
Processing image 14/285.
Processing image 15/285.
Processing image 16/285.
Processing image 17/285.
Processing image 18/285.
Processing image 19/285.
Processing image 20/285.
Processing image 21/285.
Processing image 22/285.
Processing image 23/285.
Processing image 24/285.
Processing image 25/285.
Processing image 26/285.
Processing image 27/285.
Processing image 28/285.
Processing image 29/285.
Processing image 30/285.
Processing image 31/285.
Processing image 32/285.
Processing image 33/285.
Processing image 34/285.
Processing image 35/285.
Processing image 36/285.
Processing image 37/285.
Processing image 38/285.
Processing image 39/285.
Processing image 40/285.
Processin

#### 2b. Mask filtering

In [1]:
from ssfm.simple_mask_filter import AreaFilter
area_filter = AreaFilter()

In [12]:
segmentations_folder_path = os.path.join(scene_dir, 'segmentations_filtered')

config = {'area_lower_threshold': 100,
'segmentation_folder_path': '../../data/kubric_1/segmentations',
'output_folder_path': segmentations_folder_path,
'num_processes':8}

area_filter(config)

100%|██████████| 285/285 [00:06<00:00, 45.04it/s]


### 3. Create projection associations

In [3]:
from ssfm.probabilistic_projection import *

In [6]:
pointcloud_projector = PointcloudProjection(depth_filtering_threshold=1.8)
pointcloud_projector.read_kubric_camera_parameters(scene_dir)
pointcloud_projector.read_pointcloud(pointcloud_path)

In [7]:
# get image list
photo_folder_path = os.path.join(scene_dir, 'photos')
image_list = [f for f in os.listdir(photo_folder_path) if f.endswith('.png')]
# sort image list based on the number in the file name
image_list.sort(key=lambda x: int(x.split('/')[-1].split('.')[0]))

In [8]:
#pointcloud_projector.parallel_batch_project(image_list, save_folder_path)
pointcloud_projector.parallel_batch_project_joblib(image_list, associations_folder_path, num_workers=16, save_depth=False)

Processing frames: 100%|██████████| 285/285 [00:16<00:00, 17.45it/s]


In [10]:
# build keyimage associations
from ssfm.keyimage_associations_builder import *

In [13]:
print(associations_folder_path)
print(segmentations_folder_path)
smc_solver = KeyimageAssociationsBuilder(associations_folder_path, segmentations_folder_path)
smc_solver.build_associations()

../../data/kubric_1/associations
../../data/kubric_1/segmentations_filtered


100%|██████████| 285/285 [00:01<00:00, 199.67it/s]


In [14]:
smc_solver.find_min_cover()

| Metric                                                       | Count      | Percentage           |
----------------------------------------------------------------------------------------------------
| Number of points not covered by any image                    | 689260     | 33.00                |
| Number of points covered by less than or equal to 1 image    | 804538     | 38.52                |
| Number of points covered by less than or equal to 3 images   | 1093814    | 52.37                |
| Number of points covered by less than or equal to 5 images   | 1391624    | 66.63                |


### 5. Estimate memory usage

In [13]:
from ssfm.memory_calculator import memory_calculator

In [14]:
# pointcloud file
las_file = pointcloud_path
# image file sample
image_file = os.path.join(photos_folder_path, image_list[0])
# number of images
num_images = len(image_list)
# number of segmentation ids for each point in the point cloud
num_segmentation_ids = 5

memory_calculator(las_file, image_file, num_images, num_segmentation_ids)

+----------------------------------------+-----------------------+
|              Memory Type               |  Memory Required (GB) |
+----------------------------------------+-----------------------+
|      Segmentation for each image       |     0.00048828125     |
| Pixel2point association for each image |      0.0009765625     |
| Point2pixel association for each image | 0.0077807605266571045 |
|                                        |                       |
|      Segmentation for all images       |     0.13916015625     |
| Pixel2point association for all images |      0.2783203125     |
| Point2pixel association for all images |   2.217516750097275   |
|          pc_segmentation_ids           |  0.03890380263328552  |
|         pc_segmentation_probs          |  0.03890380263328552  |
|          keyimage_association          |   0.5543791875243187  |
|                 Total                  |   3.2671840116381645  |
+----------------------------------------+--------------------

### 6. Run object registration

In [15]:
from ssfm.object_registration import *
from ssfm.post_processing import *

keyimage_associations_file_name = 'associations_keyimage.npy'
keyimage_yaml_name= 'keyimages.yaml'

In [16]:
print(pointcloud_path)
print(associations_folder_path)
print(segmentations_folder_path)

../../data/kubric_1/reconstructions/combined_point_cloud.las
../../data/kubric_1/associations
../../data/kubric_1/segmentations_filtered


In [17]:
# Create object registration
obr = ObjectRegistration(pointcloud_path, segmentations_folder_path, associations_folder_path, keyimage_associations_file_name=keyimage_associations_file_name)

# Run object registration
obr.object_registration(iou_threshold=0.50, save_semantics=True)

Processing images: 100%|██████████| 285/285 [05:23<00:00,  1.13s/it]


In [29]:
def add_semantics_to_pointcloud(pointcloud_path, semantics_path, save_las_path, remove_small_N, nearest_interpolation=False):
    """
    Add semantics to the point cloud and save it as a .las file.

    Parameters
    ----------
    pointcloud_path : str, the path to the .las file
    semantics_path : str, the path to the semantics file
    save_las_path : str, the path to save the .las file
    remove_small_N : int, remove the semantics with numbers smaller than N
    nearest_interpolation : False, not to use nearest interpolation to assign semantics to the unlabeled points; positive integer, the number of nearest neighbors to use for nearest interpolation

    Returns
    -------
    None
    """
    if pointcloud_path.endswith('.las'):
        points, colors = read_las_file(pointcloud_path)
    elif pointcloud_path.endswith('.npy'):
        points, colors = read_mesh_file(pointcloud_path)
        colors = colors * 255

    semantics = np.load(semantics_path)

    semantics_ids = np.unique(semantics)
    N_semantics = len(semantics_ids)

    print("Before removing small semantics: ")
    print('maximum of semantics: ', semantics.max())
    print('number of unique semantics: ', N_semantics)

    # remove the semantics with numbers smaller than a threshold
    for semantics_id in semantics_ids:
        if np.sum(semantics == semantics_id) < remove_small_N:
            semantics[semantics == semantics_id] = -1

    print("After removing small semantics: ")
    print('number of unique semantics: ', len(np.unique(semantics)))

    # construct a .las file
    hdr = laspy.LasHeader(version="1.2", point_format=3)
    hdr.scale = [0.0001, 0.0001, 0.0001]  # Example scale factor, adjust as needed

    # Create a LasData object
    las = laspy.LasData(hdr)

    # Add points
    las.x = points[:, 0]
    las.y = points[:, 1]
    las.z = points[:, 2]

    # Add colors
    las.red = colors[:, 0]
    las.green = colors[:, 1]
    las.blue = colors[:, 2]

    # Add semantics
    if nearest_interpolation is False:
        las.intensity = semantics
    else:
        # labeled points are the points with semantics >=0; unlabeled points are the points with semantics < 0
        labeled_points = points[semantics >= 0]
        labeled_semantics = semantics[semantics >= 0]
        unlabeled_points = points[semantics < 0]

        # construct a KDTree
        tree = cKDTree(labeled_points)

        # find the N nearest neighbors for the unlabeled points
        N = nearest_interpolation
        distances, indices = tree.query(unlabeled_points, k=N)
        nearest_semantics = labeled_semantics[indices]
        # find the most frequent semantics
        unlabeled_semantics = np.array([np.argmax(np.bincount(nearest_semantics[i])) for i in range(unlabeled_points.shape[0])])

        # combine the labeled and unlabeled semantics
        combined_semantics = np.zeros(semantics.shape)
        combined_semantics[semantics >= 0] = labeled_semantics
        combined_semantics[semantics < 0] = unlabeled_semantics

        las.intensity = combined_semantics

        


    # Write the LAS file
    las.write(save_las_path)

In [59]:
image_id = 284
remove_small_N = 500
semantics_folder_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}.npy'.format(image_id))
save_las_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}.las'.format(image_id))
add_semantics_to_pointcloud(pointcloud_path, semantics_folder_path, save_las_path, remove_small_N, nearest_interpolation=500)

Before removing small semantics: 
maximum of semantics:  5256
number of unique semantics:  814
After removing small semantics: 
number of unique semantics:  227


In [60]:
print(save_las_path)

# read las 
pc = laspy.read(save_las_path)
# get the semantics from the intensity
semantics = pc.intensity
semantics_ids = np.unique(semantics)
print('number of unique semantics: ', len(semantics_ids))
# print the number of points for each semantics
K = 0
for i in semantics_ids:
    n = np.sum(semantics == i)
    if n < 100:
        print('semantics id: ', i, ' number of points: ', n)
        K += 1

print('number of semantics with less than 100 points: ', K)

../../data/kubric_1/associations/semantics/semantics_284.las
number of unique semantics:  226
number of semantics with less than 100 points:  0


In [61]:
semantic_pc_file_path = save_las_path
post_processing = PostProcessing(semantic_pc_file_path)
post_processing.shuffle_semantic_ids()
save_las_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}_shuffled.las'.format(image_id))
post_processing.save_semantic_pointcloud(save_las_path)

Number of unique semantics:  226


In [62]:
# 500, 500
from validation import Validator
validator = Validator(
        save_las_path,
        pointcloud_path,
    )

results = validator.validate(np.arange(0.5, 1.0, 0.05))
mAP = results['AP']
mAR = results['AR']
print('AP: ', mAP, ' AR: ', mAR)
mAP = np.sum(mAP) / len(mAP)
mAR = np.sum(mAR) / len(mAR)
print('mAP: ', mAP, ' mAR: ', mAR)

Unique semantics in prediction:  226
Unique semantics in ground truth:  201
AP:  [0.8539823008849557, 0.8451327433628318, 0.8185840707964602, 0.8008849557522124, 0.7876106194690266, 0.7389380530973452, 0.6858407079646017, 0.6061946902654868, 0.3805309734513274, 0.084070796460177]  AR:  [0.9601990049751243, 0.9502487562189055, 0.9203980099502488, 0.900497512437811, 0.8855721393034826, 0.8308457711442786, 0.7711442786069652, 0.681592039800995, 0.42786069651741293, 0.0945273631840796]
mAP:  0.6601769911504425  mAR:  0.7422885572139303


In [58]:
# 400, 500
from validation import Validator
validator = Validator(
        save_las_path,
        pointcloud_path,
    )

results = validator.validate(np.arange(0.5, 1.0, 0.05))
mAP = results['AP']
mAR = results['AR']
print('AP: ', mAP, ' AR: ', mAR)
mAP = np.sum(mAP) / len(mAP)
mAR = np.sum(mAR) / len(mAR)
print('mAP: ', mAP, ' mAR: ', mAR)

Unique semantics in prediction:  241
Unique semantics in ground truth:  201
AP:  [0.8091286307053942, 0.8008298755186722, 0.7759336099585062, 0.7510373443983402, 0.7261410788381742, 0.6804979253112033, 0.6265560165975104, 0.5518672199170125, 0.37344398340248963, 0.08298755186721991]  AR:  [0.9701492537313433, 0.9601990049751243, 0.9303482587064676, 0.900497512437811, 0.8706467661691543, 0.8159203980099502, 0.7512437810945274, 0.6616915422885572, 0.44776119402985076, 0.09950248756218906]
mAP:  0.6178423236514522  mAR:  0.7407960199004975


In [54]:
# 400, 350
from validation import Validator
validator = Validator(
        save_las_path,
        pointcloud_path,
    )

results = validator.validate(np.arange(0.5, 1.0, 0.05))
mAP = results['AP']
mAR = results['AR']
print('AP: ', mAP, ' AR: ', mAR)
mAP = np.sum(mAP) / len(mAP)
mAR = np.sum(mAR) / len(mAR)
print('mAP: ', mAP, ' mAR: ', mAR)

Unique semantics in prediction:  241
Unique semantics in ground truth:  201
AP:  [0.8091286307053942, 0.7966804979253111, 0.7676348547717843, 0.7510373443983402, 0.7261410788381742, 0.6763485477178424, 0.6265560165975104, 0.5518672199170125, 0.3817427385892116, 0.08298755186721991]  AR:  [0.9701492537313433, 0.9552238805970149, 0.9203980099502488, 0.900497512437811, 0.8706467661691543, 0.8109452736318408, 0.7512437810945274, 0.6616915422885572, 0.4577114427860697, 0.09950248756218906]
mAP:  0.61701244813278  mAR:  0.7398009950248756


In [50]:
# 500, 350
from validation import Validator
validator = Validator(
        save_las_path,
        pointcloud_path,
    )

results = validator.validate(np.arange(0.5, 1.0, 0.05))
mAP = results['AP']
mAR = results['AR']
print('AP: ', mAP, ' AR: ', mAR)
mAP = np.sum(mAP) / len(mAP)
mAR = np.sum(mAR) / len(mAR)
print('mAP: ', mAP, ' mAR: ', mAR)

Unique semantics in prediction:  226
Unique semantics in ground truth:  201
AP:  [0.8539823008849557, 0.8407079646017699, 0.8141592920353983, 0.8008849557522124, 0.7876106194690266, 0.7389380530973452, 0.6858407079646017, 0.6061946902654868, 0.3938053097345133, 0.084070796460177]  AR:  [0.9601990049751243, 0.945273631840796, 0.9154228855721394, 0.900497512437811, 0.8855721393034826, 0.8308457711442786, 0.7711442786069652, 0.681592039800995, 0.4427860696517413, 0.0945273631840796]
mAP:  0.6606194690265486  mAR:  0.7427860696517412


In [46]:
# 600, 350
from validation import Validator
validator = Validator(
        save_las_path,
        pointcloud_path,
    )

results = validator.validate(np.arange(0.5, 1.0, 0.05))
mAP = results['AP']
mAR = results['AR']
print('AP: ', mAP, ' AR: ', mAR)
mAP = np.sum(mAP) / len(mAP)
mAR = np.sum(mAR) / len(mAR)
print('mAP: ', mAP, ' mAR: ', mAR)

Unique semantics in prediction:  201
Unique semantics in ground truth:  201
AP:  [0.8706467661691543, 0.8557213930348259, 0.8208955223880597, 0.8059701492537313, 0.8009950248756219, 0.746268656716418, 0.6915422885572139, 0.6119402985074627, 0.40298507462686567, 0.0945273631840796]  AR:  [0.8706467661691543, 0.8557213930348259, 0.8208955223880597, 0.8059701492537313, 0.8009950248756219, 0.746268656716418, 0.6915422885572139, 0.6119402985074627, 0.40298507462686567, 0.0945273631840796]
mAP:  0.6701492537313433  mAR:  0.6701492537313433
