# Semantic SfM of single scene in Scannet

### 1a. Download entire Scannet dataset

Find information on ScanNet website: http://www.scan-net.org/ScanNet/

After download the data, I should extract data from .sens using [SensReader/reader.py](https://github.com/ScanNet/ScanNet/tree/master/SensReader/python). To extract all scans in the test data, I wrote a script to automate executing the python script, `reader.py`:

```python
import os
import subprocess
import numpy as np

# Define the base directory for the scans_test
base_dir = "/home/zchen256/semantic_SfM/data/scannet/scans"

# Iterate over all files in the directory
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".sens"):
            # 5% chance of extracting .sens files
            if np.random.rand() > 0.05:
                continue
            # Construct the full path to the .sens file
            sens_file = os.path.join(root, file)
            # Construct the output path
            output_path = os.path.join(root, "output")
            # Create the output directory if it doesn't exist
            os.makedirs(output_path, exist_ok=True)
            # Construct the command
            command = [
                "python3", "reader.py",
                "--filename", sens_file,
                "--output_path", output_path,
                "--export_depth_images",
                "--export_color_images",
                "--export_poses",
                "--export_intrinsics"
            ]
            # Run the command
            print(command)
            subprocess.run(command)
```

### 1b. Extract Scannet data to prepare SSfM data in batch

In [1]:
from scene_extractor import *

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
def batch_extract_scenes(scene_dir, save_dir, training=False):
    # iterate over folders in scene_dir
    for scene_folder in os.listdir(scene_dir):
        scene_folder_path = os.path.join(scene_dir, scene_folder)
        save_folder_path = os.path.join(save_dir, scene_folder)
        output_folder_path = os.path.join(scene_folder_path, 'output')
        if not os.path.exists(output_folder_path):
            print(f'Error: {scene_folder_path} does not exist')
            continue
        if not os.path.exists(save_folder_path):
            os.makedirs(save_folder_path)
        try:
            scene_extractor = SceneExtractor(scene_folder_path, save_folder_path)
            print(f'Extracting scene {scene_folder}')
            if training:
                scene_extractor.extract_segmentations()
                scene_extractor.extract_ground_truth()
        except Exception as e:
            print(f'Error in extracting scene {scene_folder}: {e}')
            continue

In [4]:
scan_dir = '../../data/scannet/scans'
save_dir = '../../data/scannet/ssfm_train'

batch_extract_scenes(scan_dir, save_dir, training=True)

Error in extracting scene scene0586_01: Extract scene folder does not exist!
Error in extracting scene scene0705_00: Extract scene folder does not exist!
Error in extracting scene scene0667_00: Extract scene folder does not exist!
Error in extracting scene scene0620_01: Extract scene folder does not exist!
Error in extracting scene scene0638_00: Extract scene folder does not exist!
Error in extracting scene scene0454_00: Extract scene folder does not exist!
Error in extracting scene scene0435_03: Extract scene folder does not exist!
Error in extracting scene scene0536_00: Extract scene folder does not exist!
Error in extracting scene scene0472_02: Extract scene folder does not exist!
Error in extracting scene scene0571_01: Extract scene folder does not exist!
Error in extracting scene scene0690_00: Extract scene folder does not exist!
Error in extracting scene scene0569_00: Extract scene folder does not exist!
Error in extracting scene scene0510_02: Extract scene folder does not exist!

In [None]:
import os

scene_dir = '../../data/scene0000_00'

# batch project
photos_folder_path = os.path.join(scene_dir, 'photos')
associations_folder_path = os.path.join(scene_dir, 'associations')
segmentations_folder_path = os.path.join(scene_dir, 'segmentations')
associations_folder_path = os.path.join(scene_dir, 'associations')

### 2. Select keyimages based on blur

In [18]:
from select_keyimages import select_scannet_keyimages

In [19]:
select_scannet_keyimages(scene_dir, ratio=0.2, threshold=180, file_cluster_size=30, file_select_window=10, n_jobs=8)

Processing images: 100%|██████████| 5578/5578 [00:27<00:00, 203.72it/s]


Number of keyimages_threshold:  2963
Number of keyimages_ratio:  1115
Number of keyimages:  2963
Number of selected keyimages from window:  558
Number of selected keyimages:  614
Total images:  5578
Keyimages saved to:  ../../data/scene0000_00/associations/keyimages.yaml


### 3a. Segment keyimages

In [None]:
from ssfm.image_segmentation import ImageSegmentation

In [8]:
sam_params = {}
sam_params['model_name'] = 'sam'
sam_params['model_path'] = '../../semantic_SfM/sam/sam_vit_h_4b8939.pth'
sam_params['model_type'] = 'vit_h'
sam_params['device'] = 'cuda:6'
sam_params['points_per_side'] = 32
sam_params['pred_iou_thresh'] = 0.96
sam_params['stability_score_thresh'] = 0.96
sam_params['crop_n_layers'] = 1

In [7]:
using_keyimages = True

if using_keyimages:
    keyimages_path = os.path.join(scene_dir, 'associations', 'keyimages.yaml')
    # read keyimages
    with open(keyimages_path, 'r') as f:
        keyimages = yaml.load(f, Loader=yaml.FullLoader)

    # replace .npy with .jpg
    images = [keyimage.replace('.npy', '.jpg') for keyimage in keyimages]
    # sort images based on the values of keyimages in file names
    images = sorted(images, key=lambda x: int(x.split('_')[-1].split('.')[0]))
    image_paths = [os.path.join(scene_dir, 'photos', image) for image in images]

else:
    image_paths = [os.path.join(scene_dir, 'photos', image) for image in os.listdir(os.path.join(scene_dir, 'photos'))]
    # sort images based on the values of keyimages in file names
    image_paths = sorted(image_paths, key=lambda x: int(x.split('_')[-1].split('.')[0]))

print(image_paths)

['../../data/scene0000_00/photos/0.jpg', '../../data/scene0000_00/photos/19.jpg', '../../data/scene0000_00/photos/28.jpg', '../../data/scene0000_00/photos/36.jpg', '../../data/scene0000_00/photos/49.jpg', '../../data/scene0000_00/photos/55.jpg', '../../data/scene0000_00/photos/60.jpg', '../../data/scene0000_00/photos/70.jpg', '../../data/scene0000_00/photos/75.jpg', '../../data/scene0000_00/photos/80.jpg', '../../data/scene0000_00/photos/90.jpg', '../../data/scene0000_00/photos/100.jpg', '../../data/scene0000_00/photos/115.jpg', '../../data/scene0000_00/photos/129.jpg', '../../data/scene0000_00/photos/138.jpg', '../../data/scene0000_00/photos/141.jpg', '../../data/scene0000_00/photos/150.jpg', '../../data/scene0000_00/photos/167.jpg', '../../data/scene0000_00/photos/179.jpg', '../../data/scene0000_00/photos/183.jpg', '../../data/scene0000_00/photos/193.jpg', '../../data/scene0000_00/photos/200.jpg', '../../data/scene0000_00/photos/206.jpg', '../../data/scene0000_00/photos/212.jpg', '..

In [10]:
image_segmentor = ImageSegmentation(sam_params)   
#image_segmentor.set_distortion_correction('../data/courtright/SfM_products/agisoft_cameras.xml')
segmentations_folder_path = os.path.join(scene_dir, 'segmentations')
image_segmentor.batch_predict(image_paths, segmentations_folder_path, maximum_size=10000, save_overlap=True)

Processing image 1/614.
Processing image 2/614.
Processing image 3/614.
Processing image 4/614.
Processing image 5/614.
Processing image 6/614.
Processing image 7/614.
Processing image 8/614.
Processing image 9/614.
Processing image 10/614.
Processing image 11/614.
Processing image 12/614.
Processing image 13/614.
Processing image 14/614.
Processing image 15/614.
Processing image 16/614.
Processing image 17/614.
Processing image 18/614.
Processing image 19/614.
Processing image 20/614.
Processing image 21/614.
Processing image 22/614.
Processing image 23/614.
Processing image 24/614.
Processing image 25/614.
Processing image 26/614.
Processing image 27/614.
Processing image 28/614.
Processing image 29/614.
Processing image 30/614.
Processing image 31/614.
Processing image 32/614.
Processing image 33/614.
Processing image 34/614.
Processing image 35/614.
Processing image 36/614.
Processing image 37/614.
Processing image 38/614.
Processing image 39/614.
Processing image 40/614.
Processin

### 3b. GroundingDINO filter

wintermute 

In [2]:
from ssfm.groundingDINO_mask_filter import *

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
grounding_dino_config = {}
grounding_dino_config['weights_path'] = "../../semantic_SfM/grounding_DINO/groundingdino_swint_ogc.pth"
grounding_dino_config['config_path'] = "../../semantic_SfM/grounding_DINO/GroundingDINO_SwinT_OGC.py"
grounding_dino_config['prediction_save_folder_path'] = "../../data/scene0000_00/grounding_dino_predictions"
grounding_dino_config['text_prompt'] = ["cabinet", "bed", "chair", "sofa", "table", "door", "window", "bookshelf", "picture", "counter", "desk", "curtain", "refrigerator", "shower curtain", "toilet", "sink", "bathtub", "furniture"]
grounding_dino_config['box_treshold'] = 0.6
grounding_dino_config['text_treshold'] = 0.6
grounding_dino_config['device'] = 'cuda:5'
grounding_dino_config['remove_background'] = True

mask_folder_path = "../../data/scene0000_00/segmentations"
save_folder_path = "../../data/scene0000_00/segmentations_filtered"
image_folder_path = "../../data/scene0000_00/photos"
keyimages_path = "../../data/scene0000_00/associations/keyimages.yaml"


In [11]:
mask_filter = GroundingDINOMaskFilter(mask_folder_path, save_folder_path)

mask_filter.predict_bounding_boxes(image_folder_path, grounding_dino_config, keyimages_path)

# how to get other furniture classes?

The number of images: 5578
final text_encoder_type: bert-base-uncased


100%|██████████| 614/614 [32:28<00:00,  3.17s/it]


### 3c. Filtering

### 4. Create projection associations

In [60]:
from ssfm.probabilistic_projection import *

In [61]:
pointcloud_projector = PointcloudProjection(depth_filtering_threshold=0.005)
pointcloud_projector.read_scannet_camera_parameters(scene_dir)
mesh_file_path = os.path.join(scene_dir, 'reconstructions', 'mesh_vertices_color.npy')
pointcloud_projector.read_scannet_mesh(mesh_file_path)

In [62]:
assert os.path.exists(keyimages_path), 'Keyimages not found'

with open(keyimages_path, 'r') as f:
    keyimages = yaml.safe_load(f)

# replace .npy with .jpg in keyimages, keyimages is a list of strings
image_list = [os.path.splitext(image)[0] + '.jpg' for image in keyimages]

In [63]:
#pointcloud_projector.parallel_batch_project(image_list, save_folder_path)
pointcloud_projector.parallel_batch_project_joblib(image_list, associations_folder_path, num_workers=16, save_depth=False)

Processing frames: 100%|██████████| 614/614 [02:30<00:00,  4.09it/s]


In [64]:
# build keyimage associations
from ssfm.keyimage_associations_builder import *

In [65]:
smc_solver = KeyimageAssociationsBuilder(associations_folder_path, segmentations_folder_path)
smc_solver.build_associations()

100%|██████████| 614/614 [00:03<00:00, 182.05it/s]


In [66]:
smc_solver.find_min_cover()

| Metric                                                       | Count      | Percentage           |
----------------------------------------------------------------------------------------------------
| Number of points not covered by any image                    | 8252       | 10.14                |
| Number of points covered by less than or equal to 1 image    | 16822      | 20.67                |
| Number of points covered by less than or equal to 3 images   | 28589      | 35.14                |
| Number of points covered by less than or equal to 5 images   | 36521      | 44.88                |


### 5. Estimate memory usage

In [67]:
from ssfm.memory_calculator import memory_calculator

In [68]:
# pointcloud file
npy_file = mesh_file_path
# image file sample
image_file = os.path.join(photos_folder_path, image_list[0])
# number of images
num_images = len(image_list)
# number of segmentation ids for each point in the point cloud
num_segmentation_ids = 5

memory_calculator(npy_file, image_file, num_images, num_segmentation_ids)

+----------------------------------------+-----------------------+
|              Memory Type               |  Memory Required (GB) |
+----------------------------------------+-----------------------+
|      Segmentation for each image       |  0.002336740493774414 |
| Pixel2point association for each image |  0.004673480987548828 |
| Point2pixel association for each image | 0.0003031231462955475 |
|                                        |                       |
|      Segmentation for all images       |   1.4347586631774902  |
| Pixel2point association for all images |   2.8695173263549805  |
| Point2pixel association for all images |  0.18611761182546616  |
|          pc_segmentation_ids           | 0.0015156157314777374 |
|         pc_segmentation_probs          | 0.0015156157314777374 |
|          keyimage_association          |  0.04652940295636654  |
|                 Total                  |   4.539954235777259   |
+----------------------------------------+--------------------

### 6. Run object registration

In [69]:
from ssfm.object_registration import *
from ssfm.post_processing import *

In [70]:
pointcloud_path = os.path.join(scene_dir, 'reconstructions', 'mesh_vertices_color.npy')

keyimage_associations_file_name = 'associations_keyimage.npy'
keyimage_yaml_name = 'keyimages.yaml'

In [77]:
# Create object registration
obr = ObjectRegistration(pointcloud_path, segmentations_folder_path, associations_folder_path, keyimage_associations_file_name=keyimage_associations_file_name, keyimage_yaml_name=keyimage_yaml_name)

# Run object registration
obr.object_registration(iou_threshold=0.5, save_semantics=True)

Processing image 1/614: 0.npy
Processing image 2/614: 100.npy
Processing image 3/614: 1000.npy
Processing image 4/614: 1017.npy
Processing image 5/614: 1020.npy
Processing image 6/614: 1031.npy
Processing image 7/614: 1043.npy
Processing image 8/614: 1055.npy
Processing image 9/614: 1069.npy
Processing image 10/614: 1078.npy
Processing image 11/614: 1085.npy
Processing image 12/614: 1093.npy
Processing image 13/614: 1100.npy
Processing image 14/614: 1111.npy
Processing image 15/614: 1129.npy
Processing image 16/614: 1139.npy
Processing image 17/614: 1147.npy
Processing image 18/614: 115.npy
Processing image 19/614: 1150.npy
Processing image 20/614: 1162.npy
Processing image 21/614: 1167.npy
Processing image 22/614: 1178.npy
Processing image 23/614: 1180.npy
Processing image 24/614: 1193.npy
Processing image 25/614: 1200.npy
Processing image 26/614: 1217.npy
Processing image 27/614: 1229.npy
Processing image 28/614: 1230.npy
Processing image 29/614: 1241.npy
Processing image 30/614: 125

In [86]:
def add_semantics_to_pointcloud(pointcloud_path, semantics_path, save_las_path, nearest_interpolation=False):
    """
    Add semantics to the point cloud and save it as a .las file.

    Parameters
    ----------
    pointcloud_path : str, the path to the .las file
    semantics_path : str, the path to the semantics file
    save_las_path : str, the path to save the .las file
    nearest_interpolation : False, not to use nearest interpolation to assign semantics to the unlabeled points; positive integer, the number of nearest neighbors to use for nearest interpolation

    Returns
    -------
    None
    """
    if pointcloud_path.endswith('.las'):
        points, colors = read_las_file(pointcloud_path)
    elif pointcloud_path.endswith('.npy'):
        points, colors = read_mesh_file(pointcloud_path)
        colors = colors * 255

    semantics = np.load(semantics_path)

    print('maximum of semantics: ', semantics.max())
    print('number of unique semantics: ', len(np.unique(semantics)))

    # construct a .las file
    hdr = laspy.LasHeader(version="1.2", point_format=3)
    hdr.scale = [0.0001, 0.0001, 0.0001]  # Example scale factor, adjust as needed
    hdr.offset = np.min(points, axis=0)

    # Create a LasData object
    las = laspy.LasData(hdr)

    # Add points
    las.x = points[:, 0]
    las.y = points[:, 1]
    las.z = points[:, 2]

    # Add colors
    las.red = colors[:, 0]
    las.green = colors[:, 1]
    las.blue = colors[:, 2]

    # Add semantics
    if nearest_interpolation is False:
        las.intensity = semantics
    else:
        # labeled points are the points with semantics >=0; unlabeled points are the points with semantics < 0
        labeled_points = points[semantics >= 0]
        labeled_semantics = semantics[semantics >= 0]
        unlabeled_points = points[semantics < 0]

        # construct a KDTree
        tree = cKDTree(labeled_points)

        # find the N nearest neighbors for the unlabeled points
        N = nearest_interpolation
        distances, indices = tree.query(unlabeled_points, k=N)
        nearest_semantics = labeled_semantics[indices]
        # find the most frequent semantics
        unlabeled_semantics = np.array([np.argmax(np.bincount(nearest_semantics[i])) for i in range(unlabeled_points.shape[0])])

        # combine the labeled and unlabeled semantics
        combined_semantics = np.zeros(semantics.shape)
        combined_semantics[semantics >= 0] = labeled_semantics
        combined_semantics[semantics < 0] = unlabeled_semantics

        las.intensity = combined_semantics

        


    # Write the LAS file
    las.write(save_las_path)

In [87]:
image_id = 613
semantics_folder_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}.npy'.format(image_id))
save_las_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}.las'.format(image_id))
add_semantics_to_pointcloud(pointcloud_path, semantics_folder_path, save_las_path, nearest_interpolation=5)

maximum of semantics:  36093
number of unique semantics:  692


In [6]:
semantic_pc_file_path = save_las_path
post_processing = PostProcessing(semantic_pc_file_path)
post_processing.shuffle_semantic_ids()
save_las_path = os.path.join(associations_folder_path, 'semantics', 'semantics_{}_shuffled.las'.format(image_id))
post_processing.save_semantic_pointcloud(save_las_path)

Number of unique semantics:  691


In [85]:
from validation import *

validation = ScannetValidation()
validation.read_scene('../../data/scene0000_00', remove_background_in_prediction=False)
validation.evaluate()

Unique labels in the ground truth:  [ -1   9  20  26  66  67  71  87  95  98 141 166 193 247 369 435 448 465
 468]
Counts of unique labels in the ground truth:  [62613  1482   269   429  1903   249   201   454   192   695    43   258
  2733  2509   418   220  2049    66   229]
Number of unique labels in the ground truth:  19
Accuracy:  0.761439775619384
Precision:  0.761439775619384
F1-score:  0.7614397756193841
