In [1]:
# Uncomment to use autoreload
%load_ext autoreload
%autoreload 2

import os
import os.path as osp
import sys
import torch
import numpy as np
from time import time
from omegaconf import OmegaConf
start = time()
import warnings
warnings.filterwarnings('ignore')

# torch.cuda.set_device(I_GPU)
DIR = os.path.dirname(os.getcwd())
ROOT = os.path.join(DIR, "..")
sys.path.insert(0, ROOT)
sys.path.insert(0, DIR)

from torch_points3d.utils.config import hydra_read
from torch_geometric.data import Data
from torch_points3d.core.multimodal.data import MMData, MMBatch
from torch_points3d.visualization.multimodal_data import visualize_mm_data
from torch_points3d.core.multimodal.image import SameSettingImageData, ImageData
from torch_points3d.datasets.segmentation.multimodal.scannet import ScannetDatasetMM
from torch_points3d.datasets.segmentation.scannet import CLASS_COLORS, CLASS_NAMES, CLASS_LABELS
from torch_points3d.metrics.segmentation_tracker import SegmentationTracker
from torch_points3d.datasets.segmentation import IGNORE_LABEL
from torch_points3d.metrics.scannet_segmentation_tracker import ScannetSegmentationTracker
from torch_points3d.metrics.colored_tqdm import Coloredtqdm as Ctq

from PIL import Image

import matplotlib.pyplot as plt 

%matplotlib inline

CLASS_COLORS[0] = (174.0, 199.0, 232.0)
CLASS_COLORS[-1] = (0, 0, 0)
import plotly.io as pio

#pio.renderers.default = 'jupyterlab'        # for local notebook
pio.renderers.default = 'iframe_connected'  # for remote notebook. Other working (but seemingly slower) options are: 'sphinx_gallery' and 'iframe'

MMData debug() function changed, please uncomment the 3rd assert line when doing inference without M2F features!


In [2]:
def get_seen_points(mm_data):
    ### Select seen points
    csr_idx = mm_data.modalities['image'][0].view_csr_indexing
    dense_idx_list = torch.arange(mm_data.modalities['image'][0].num_points).repeat_interleave(csr_idx[1:] - csr_idx[:-1])
    # take subset of only seen points without re-indexing the same point
    mm_data = mm_data[dense_idx_list.unique()]
    return mm_data

def get_mode_pred(data):
    pixel_validity = data.data.mvfusion_input[:, :, 0].bool()
    mv_preds = data.data.mvfusion_input[:, :, -1].long()
            
    valid_m2f_feats = []
    for i in range(len(mv_preds)):
        valid_m2f_feats.append(mv_preds[i][pixel_validity[i]])

    mode_preds = []
    for m2feats_of_seen_point in valid_m2f_feats:
        mode_preds.append(torch.mode(m2feats_of_seen_point.squeeze(), dim=0)[0])
    mode_preds = torch.stack(mode_preds, dim=0)
        
    return mode_preds

def get_random_view_pred(data):
    pixel_validity = data.data.mvfusion_input[:, :, 0].bool()
    mv_preds = data.data.mvfusion_input[:, :, -1].long()
            
    valid_m2f_feats = []
    for i in range(len(mv_preds)):
        valid_m2f_feats.append(mv_preds[i][pixel_validity[i]])

    selected_view_preds = []
    for m2feats_of_seen_point in valid_m2f_feats:
        selected_idx = torch.randint(low=0, high=m2feats_of_seen_point.shape[0], size=(1,))
        selected_pred = m2feats_of_seen_point[selected_idx].squeeze(0)
        selected_view_preds.append(selected_pred)
    selected_view_preds = torch.stack(selected_view_preds, dim=0)
        
    return selected_view_preds


In [6]:
# Set your dataset root directory, where the data was/will be downloaded
DATA_ROOT = '/scratch-shared/fsun/dvata'

dataset_config = 'segmentation/multimodal/Feng/scannet-neucon-smallres-m2f.yaml' 

# models_config = 'segmentation/multimodal/Feng/mvfusion'    # model family
# model_name = 'MVFusion_3D_small_6views'                       # specific model

# models_config = 'segmentation/multimodal/Feng/view_selection_experiment'    # model family
# model_name = 'Deepset_3D'                       # specific model


models_config = 'segmentation/multimodal/Feng/view_selection_experiment'    # model family
model_name = 'Average_Fusion'                       # specific model


overrides = [
    'task=segmentation',
    f'data={dataset_config}',
    f'models={models_config}',
    f'model_name={model_name}',
    f'data.dataroot={DATA_ROOT}',
]

cfg = hydra_read(overrides)
OmegaConf.set_struct(cfg, False)  # This allows getattr and hasattr methods to function correctly
cfg.data.load_m2f_masks = True   # load Mask2Former predicted masks
cfg.data.m2f_preds_dirname = 'm2f_masks'
cfg.data.n_views = cfg.models[model_name].backbone.transformer.n_views
print(cfg.data.n_views)

# Dataset instantiation
start = time()
dataset = ScannetDatasetMM(cfg.data)
# print(dataset)|
print(f"Time = {time() - start:0.1f} sec.")

6
Load predicted 2D semantic segmentation labels from directory  m2f_masks
initialize train dataset
initialize val dataset
Time = 7.6 sec.


In [7]:
from torch_points3d.models.model_factory import instantiate_model

# # ViT_masks 3rd run
# checkpoint_dir = '/home/fsun/DeepViewAgg/outputs/ViT_masks_3rd_run' # 3rd run

# # ViT_masks 9 views
# checkpoint_dir = '/home/fsun/DeepViewAgg/outputs/2023-01-25/16-02-53'


# # MVFusion_orig
# checkpoint_dir = '/home/fsun/DeepViewAgg/outputs/MVFusion_orig'


# # M2F masks 6 views small
# checkpoint_dir = "/home/fsun/DeepViewAgg/outputs/MVFusion_3D_6_views_m2f_masks"


# # DeepSet_3D
# checkpoint_dir = "/home/fsun/DeepViewAgg/outputs/2023-01-23/12-57-16"

# # DeepSet_3D m2f_masks
# checkpoint_dir = "/home/fsun/DeepViewAgg/outputs/2023-02-05/23-15-04"



# ViT Average Fusion (3D)
checkpoint_dir = "/home/fsun/DeepViewAgg/outputs/2023-01-15/19-22-41"


# Create the model
print(f"Creating model: {cfg.model_name}")
model = instantiate_model(cfg, dataset)
# print(model)

# Load the checkpoint and recover the 'best_miou' model weights
checkpoint = torch.load(f'{checkpoint_dir}/{model_name}.pt', map_location='cpu')
model.load_state_dict_with_same_shape(checkpoint['models']['best_miou'], strict=False)

# Prepare the model for training
model = model.cuda()
print('Model loaded')

Creating model: Average_Fusion
task:  segmentation.multimodal
tested_model_name:  Average_Fusion
class_name:  MVAttentionAPIModel
model_module:  torch_points3d.models.segmentation.multimodal.Feng.mvattention_attention_weighted_m2f_pred
name, cls of chosen model_cls:  MVAttentionAPIModel <class 'torch_points3d.models.segmentation.multimodal.Feng.mvattention_attention_weighted_m2f_pred.MVAttentionAPIModel'>
x feature dim:  {'FEAT': 3}
nc_in:  23
nc_in:  64
nc_in:  32
nc_in:  64
nc_in:  128
nc_in:  256
nc_in:  128
nc_in:  128
nc_in:  96
nc_in:  96


FileNotFoundError: [Errno 2] No such file or directory: '/home/fsun/DeepViewAgg/outputs/2023-01-15/19-22-41/Average_Fusion.pt'

In [5]:
import pandas as pd
pd.set_option('display.max_rows', 50)

# Create validation loader
dataset.create_dataloaders(
    model,
    1,
    False,
    17,
    False,
    train_only=False,
    val_only=True,
    test_batch_size=1
)

mapping_idx_to_scan_names = getattr(dataset.val_dataset, "MAPPING_IDX_TO_SCAN_{}_NAMES".format(dataset.val_dataset.split.upper()))

# Evaluate 2D and save refined masks

In [6]:
# Feng: extra imports for 2D evaluation
from torch_points3d.utils.multimodal import lexargsort
from torch_points3d.core.multimodal.csr import CSRData
import scipy.ndimage
from PIL import Image
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

class Evaluator():
    
    def __init__(self):

        self._dataset = dataset
        self.wandb_log = False
        self.tensorboard_log = False
    
    
    def eval_3d_seen_points(self, stage_name=""):
        self._is_training = False
        
        self._tracker_baseline: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
        self._tracker_mvfusion: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
            
        print("trainer.py: Tracking 2D mask and 2D refined mask scores!")
        self._tracker_2d_model_pred_masks: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
        self._tracker_2d_mvfusion_pred_masks: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
                        
        epoch = 1

        if self._dataset.has_val_loader:
            if not stage_name or stage_name == "val":
                self._test_epoch_3d_seen_points(epoch, "val")    
                
                
    def eval(self, stage_name=""):
        self._is_training = False
        
        self._tracker_baseline: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
        self._tracker_mvfusion: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
                        
        epoch = 1

        if self._dataset.has_val_loader:
            if not stage_name or stage_name == "val":
                self._test_epoch(epoch, "val")    
    
    
    def _track_2d_results(self, model, mm_data, contains_pred=False, save_output=False):
        """ Track 2D scores for input semantic segmentation masks and output Multi-View Fusion refined 2D masks using simple nearest-neighbor interpolation and projected 3D point predictions.
        """
        if contains_pred == False:
            mm_data.data.pred = model.output.detach().cpu().argmax(1)
        
        mappings = mm_data.modalities['image'][0].mappings
        point_ids = torch.arange(
                        mappings.num_groups, device=mappings.device).repeat_interleave(
                        mappings.pointers[1:] - mappings.pointers[:-1])
        image_ids = mappings.images.repeat_interleave(
                        mappings.values[1].pointers[1:] - mappings.values[1].pointers[:-1])    
        pixels_full = mappings.pixels

        # Sort point and image ids based on image_id
        idx_sort = lexargsort(image_ids, point_ids)
        image_ids = image_ids[idx_sort]
        point_ids = point_ids[idx_sort]
        pixels_full = pixels_full[idx_sort].long()

        # Get pointers for easy indexing
        pointers = CSRData._sorted_indices_to_pointers(image_ids)

        # Save refined masks
        im_paths = mm_data.modalities['image'][0].gt_mask_path
        scan_dir = os.sep.join(im_paths[0].split(os.sep)[:-2])
        input_mask_name = mm_data.modalities['image'][0].m2f_pred_mask_path[0].split(os.sep)[-2]

        # Dirty workaround for masks in different directory
        if input_mask_name == 'ViT_masks':
            scan_id = scan_dir.split(os.sep)[-1]
            mask_im_dir = osp.join("/home/fsun/data/scannet/scans", scan_id, input_mask_name)
            refined_mask_im_dir = osp.join(scan_dir, input_mask_name + '_refined')
        else:
            mask_im_dir = osp.join(scan_dir, input_mask_name)
            refined_mask_im_dir = osp.join(scan_dir, input_mask_name + '_refined')
            
        if save_output:
            print("Creating refined mask dir at ", refined_mask_im_dir)
            os.makedirs(refined_mask_im_dir, exist_ok=True)
        
        # Loop over all N views
        for i, x in enumerate(mm_data.modalities['image'][0]):

            # Grab the 3D points corresponding to ith view
            start, end = pointers[i], pointers[i+1]    
            points = point_ids[start:end]
            pixels = pixels_full[start:end]
            # Image (x, y) pixel index
            w, h = pixels[:, 0], pixels[:, 1]

            # Grab set of points visible in current view
            mm_data_of_view = mm_data[points]
            
            im_ref_w, im_ref_h = x.ref_size

            # Get nearest neighbor interpolated projection image filled with 3D labels
            pred_mask_2d = -1 * torch.ones((im_ref_h, im_ref_w), dtype=torch.long, device=mm_data_of_view.device)    
            pred_mask_2d[h, w] = mm_data_of_view.data.pred.squeeze()
            
            nearest_neighbor = scipy.ndimage.morphology.distance_transform_edt(
                pred_mask_2d==-1, return_distances=False, return_indices=True)    
            pred_mask_2d = pred_mask_2d[nearest_neighbor].numpy().astype(np.uint8)
            pred_mask_2d = Image.fromarray(pred_mask_2d, 'L')          
            
            # SAVE REFINED MASK IN GIVEN DIR
            im_name = x.m2f_pred_mask_path[0].split("/")[-1]
        
            pred_mask_2d = pred_mask_2d.resize((640, 480), resample=0)
            
            if save_output:
                pred_mask_2d.save(osp.join(refined_mask_im_dir, im_name))

            pred_mask_2d = np.asarray(pred_mask_2d)
            
            # 2D mIoU calculation for M2F labels per view
            # Get gt 2d image
            gt_img_path = x.m2f_pred_mask_path[0].split("/")
            # Adjust filepath after Snellius migration
            gt_img_path[1] = 'scratch-shared'
            gt_img_path[-2] = 'label-filt-scannet20'
            gt_img_path = "/".join(gt_img_path)
            gt_img = Image.open(gt_img_path)
            
            
            gt_img = np.asarray(gt_img.resize((640, 480), resample=0)).astype(int) - 1   # -1 label offset

            # Input mask and refined mask for current view
            refined_2d_pred = pred_mask_2d
            
            # Get gt 2d image
            orig_2d_pred = np.asarray(Image.open(x.m2f_pred_mask_path[0])).astype(int) - 1 # x.m2f_pred_mask[0][0]
            
            # 2D segmentation network mIoU
            self._tracker_2d_model_pred_masks.track(
                pred_labels=orig_2d_pred, gt_labels=gt_img, model=None)
                            
            # 2D MVFusion mIoU
            self._tracker_2d_mvfusion_pred_masks.track(
                pred_labels=refined_2d_pred, gt_labels=gt_img, model=None)
            
        return



    def _test_epoch_3d_seen_points(self, epoch, stage_name: str):
        
        def get_seen_points(mm_data):
            ### Select seen points
            csr_idx = mm_data.modalities['image'][0].view_csr_indexing
            dense_idx_list = torch.arange(mm_data.modalities['image'][0].num_points).repeat_interleave(csr_idx[1:] - csr_idx[:-1])
            # take subset of only seen points without re-indexing the same point
            mm_data = mm_data[dense_idx_list.unique()]
            return mm_data

        def get_mode_pred(data):
            pixel_validity = data.data.mvfusion_input[:, :, 0].bool()
            mv_preds = data.data.mvfusion_input[:, :, -1].long()

            valid_m2f_feats = []
            for i in range(len(mv_preds)):
                valid_m2f_feats.append(mv_preds[i][pixel_validity[i]])

            mode_preds = []
            for m2feats_of_seen_point in valid_m2f_feats:
                mode_preds.append(torch.mode(m2feats_of_seen_point.squeeze(), dim=0)[0])
            mode_preds = torch.stack(mode_preds, dim=0)

            return mode_preds

        loaders = [self._dataset.val_dataloader]

        self._model.eval()
            
        count = 0

        for loader in loaders:
            print("Input mask type: ", loader.dataset.m2f_preds_dirname)
            
            stage_name = loader.dataset.name
            self._tracker_baseline.reset(stage_name)
            self._tracker_mvfusion.reset(stage_name)
            
            self._tracker_2d_mvfusion_pred_masks.reset(stage_name)
            self._tracker_2d_model_pred_masks.reset(stage_name)

            with Ctq(loader) as tq_loader:
                for data in tq_loader:
                    with torch.no_grad():

                        self._model.set_input(data, self._device)
                        with torch.cuda.amp.autocast(enabled=self._model.is_mixed_precision()):
                            self._model.forward(epoch=epoch)

                        data.data.pred = self._model.output.detach().cpu().argmax(1)

                        if count < 5:
                            print("Before subsampling")
                            print(data, flush=True)
                            print(data.data.pred, data.data.pred.shape, flush=True)
                            print(data.data.y, data.data.y.shape, flush=True)

                        data = get_seen_points(data)
                        mode_pred = get_mode_pred(data)

                        count += 1
                        if count < 5:
                            print("After subsampling")
                            print(data, flush=True)
                            print(data.data.pred, data.data.pred.shape, flush=True)
                            print(data.data.y, data.data.y.shape, flush=True)

                        # 3D mIoU
                        self._tracker_baseline.track(pred_labels=mode_pred, gt_labels=data.data.y, model=None)
                        self._tracker_mvfusion.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)

#                         # 2D mIoU
                        self._track_2d_results(self._model, data, contains_pred=True, save_output=False)

                    tq_loader.set_postfix(**self._tracker_mvfusion.get_metrics())


            print("Evaluated scores for 3D semantic segmentation on subset of seen points: ")
            print("--- Baseline 3D ---")
            self._tracker_baseline.print_summary()
            print("--- Baseline 2D ---")
            self._tracker_2d_model_pred_masks.print_summary()
            
            print("--- MVFusion_3D 3D ---")
            print(self._tracker_mvfusion.get_metrics())
            print("--- MVFusion_3D 2D ---")
            self._tracker_2d_mvfusion_pred_masks.print_summary()
            
    def _test_epoch(self, epoch, stage_name: str):
        
        def get_seen_points(mm_data):
            ### Select seen points
            csr_idx = mm_data.modalities['image'][0].view_csr_indexing
            dense_idx_list = torch.arange(mm_data.modalities['image'][0].num_points).repeat_interleave(csr_idx[1:] - csr_idx[:-1])
            # take subset of only seen points without re-indexing the same point
            mm_data = mm_data[dense_idx_list.unique()]
            return mm_data

        def get_mode_pred(data):
            pixel_validity = data.data.mvfusion_input[:, :, 0].bool()
            mv_preds = data.data.mvfusion_input[:, :, -1].long()

            valid_m2f_feats = []
            for i in range(len(mv_preds)):
                valid_m2f_feats.append(mv_preds[i][pixel_validity[i]])

            mode_preds = []
            for m2feats_of_seen_point in valid_m2f_feats:
                mode_preds.append(torch.mode(m2feats_of_seen_point.squeeze(), dim=0)[0])
            mode_preds = torch.stack(mode_preds, dim=0)

            return mode_preds

        loaders = [self._dataset.val_dataloader]

        self._model.eval()
            
        count = 0

        for loader in loaders:
            print("Input mask type: ", loader.dataset.m2f_preds_dirname)
            
            stage_name = loader.dataset.name
            self._tracker_mvfusion.reset(stage_name)
            
            with Ctq(loader) as tq_loader:
                for data in tq_loader:
                    with torch.no_grad():

                        self._model.set_input(data, self._device)
                        with torch.cuda.amp.autocast(enabled=self._model.is_mixed_precision()):
                            self._model.forward(epoch=epoch)

                        data.data.pred = self._model.output.detach().cpu().argmax(1)


#                         data = get_seen_points(data)


                        # 3D mIoU
                        self._tracker_mvfusion.track(model, full_res=True, data=data)
#                         self._tracker_mvfusion.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)

                    tq_loader.set_postfix(**self._tracker_mvfusion.get_metrics())

            print("Evaluated scores for 3D semantic segmentation on all points: ")
            
            print("--- MVFusion_3D 3D ---")
            print(self._tracker_mvfusion.get_metrics())

In [7]:
evaluator = Evaluator()
evaluator._model = model
evaluator._device = model.device
evaluator.eval(stage_name='val')

Input mask type:  m2f_masks


  0%|          | 0/312 [00:01<?, ?it/s]

Evaluated scores for 3D semantic segmentation on all points: 
--- MVFusion_3D 3D ---
{'val_loss_seg': 0.3198231861416181, 'val_loss_cross_entropy': 0.3198231861416181, 'val_acc': 91.71695212611478, 'val_macc': 85.31477998411393, 'val_miou': 76.0211958779641}


In [8]:
evaluator._tracker_mvfusion.get_metrics()

# ViT_masks DeepSet_3D
# {'val_loss_seg': 0.28341159257942283,
#  'val_loss_cross_entropy': 0.28341159257942283,
#  'val_acc': 92.44521870741914,
#  'val_macc': 85.81729391336346,
#  'val_miou': 77.33113003915363}
# m2f_masks DeepSet_3D



{'val_loss_seg': 0.3198231861416181,
 'val_loss_cross_entropy': 0.3198231861416181,
 'val_acc': 91.71695212611478,
 'val_macc': 85.31477998411393,
 'val_miou': 76.0211958779641}

In [19]:
print("Evaluated scores for 3D semantic segmentation on subset of seen points: ")
print("--- Baseline 3D ---")
print(evaluator._tracker_baseline._miou)
# print("--- Baseline 2D ---")
# # evaluator._tracker_2d_model_pred_masks.print_summary()
# print(evaluator._tracker_2d_model_pred_masks._miou)

print("--- MVFusion_3D 3D ---")
# evaluator._tracker_mvfusion.print_summary()
print(evaluator._tracker_mvfusion._miou)

# print("--- MVFusion_3D 2D ---")
# # evaluator._tracker_2d_mvfusion_pred_masks.print_summary()
# print(evaluator._tracker_2d_mvfusion_pred_masks._miou)


""" ViT_masks
Evaluated scores for 3D semantic segmentation on subset of seen points: 
--- Baseline 3D ---
73.0155679447881
--- Baseline 2D ---
73.08250254630084
--- MVFusion_3D 3D ---
79.55840757106
--- MVFusion_3D 2D ---
76.10503204883486
"""

""" m2f_masks
Evaluated scores for 3D semantic segmentation on subset of seen points: 
--- Baseline 3D ---
71.14105785606102
--- Baseline 2D ---
70.91280415196708
--- MVFusion_3D 3D ---
77.69794634598392
--- MVFusion_3D 2D ---
74.66308931687603
"""

Evaluated scores for 3D semantic segmentation on subset of seen points: 
--- Baseline 3D ---
71.14105785606102
--- Baseline 2D ---
70.91280415196708
--- MVFusion_3D 3D ---
77.69794634598392
--- MVFusion_3D 2D ---
74.66308931687603


' ViT_masks\nEvaluated scores for 3D semantic segmentation on subset of seen points: \n--- Baseline 3D ---\n73.0155679447881\n--- Baseline 2D ---\n73.08250254630084\n--- MVFusion_3D 3D ---\n79.55840757106\n--- MVFusion_3D 2D ---\n76.10503204883486\n'

# Find toilets and visualize 2D-3D predictions

In [80]:
def visualize_2d(scene_id, mask_names):
    mapping_idx_to_names = dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES
    scene_id = mapping_idx_to_names[scene_id]

    
    input_mask_dir = "/home/fsun/data/scannet/scans"
    input_mask_name = 'ViT_masks'

    refined_mask_dir = "/scratch-shared/fsun/data/scannet/scans"
    refined_mask_name = 'ViT_masks_refined'

#     mask_names = os.listdir(osp.join(refined_mask_dir, scene_id, refined_mask_name))
#     mask_names = sorted(mask_names, key=lambda i: int(os.path.splitext(os.path.basename(i))[0]))

#     mask_names = [m for i, m in enumerate(mask_names) if i % 2 == 0]

    for m in mask_names:
        print(m)

        f, axarr = plt.subplots(1, 4, figsize=(12, 8))
        f.subplots_adjust(hspace=0.01, wspace=0.03)

        input_im = Image.open(osp.join(input_mask_dir, scene_id, input_mask_name, m))
        input_im = np.array(input_im) - 1   # label offset
        input_im = np.array(CLASS_COLORS)[input_im].astype(np.uint8)

        refined_im = Image.open(osp.join(refined_mask_dir, scene_id, refined_mask_name, m))
        refined_im = np.array(refined_im)
        refined_im = np.array(CLASS_COLORS)[refined_im].astype(np.uint8)

        gt_im = Image.open(osp.join(refined_mask_dir, scene_id, 'label-filt-scannet20', m))
        gt_im = np.array(gt_im).astype(int) - 1   # label offset
        gt_im = np.array(CLASS_COLORS)[gt_im].astype(np.uint8)

        im = Image.open(osp.join(refined_mask_dir, scene_id, 'color_resized', m))

        axarr[0].set_title("image")
        axarr[1].set_title("ground truth")
        axarr[2].set_title("segmentation mask")
        axarr[3].set_title("refined mask")

        axarr[0].imshow(im) 
        axarr[1].imshow(gt_im) 
        axarr[2].imshow(input_im)
        axarr[3].imshow(refined_im) 

        for x in axarr:
            x.axis('off')
        plt.show()

In [11]:
# # Visualize refined masks on 0.01 voxel size
# from torch_points3d.utils.multimodal import lexargsort
# from torch_points3d.core.multimodal.csr import CSRData
# import scipy.ndimage
# from PIL import Image
# import numpy as np

# def visualize_2d_upscaled(model, mm_data, contains_pred=False, save_output=False):
#     """ Track 2D scores for input semantic segmentation masks and output Multi-View Fusion refined 2D masks using simple nearest-neighbor interpolation and projected 3D point predictions.
#     """
#     if contains_pred == False:
#         mm_data.data.pred = model.output.detach().cpu().argmax(1)

#     mappings = mm_data.modalities['image'][0].mappings
#     point_ids = torch.arange(
#                     mappings.num_groups, device=mappings.device).repeat_interleave(
#                     mappings.pointers[1:] - mappings.pointers[:-1])
#     image_ids = mappings.images.repeat_interleave(
#                     mappings.values[1].pointers[1:] - mappings.values[1].pointers[:-1])    
#     pixels_full = mappings.pixels

#     # Sort point and image ids based on image_id
#     idx_sort = lexargsort(image_ids, point_ids)
#     image_ids = image_ids[idx_sort]
#     point_ids = point_ids[idx_sort]
#     pixels_full = pixels_full[idx_sort].long()

#     # Get pointers for easy indexing
#     pointers = CSRData._sorted_indices_to_pointers(image_ids)

#     # Save refined masks
#     im_paths = mm_data.modalities['image'][0].gt_mask_path
#     scan_dir = os.sep.join(im_paths[0].split(os.sep)[:-2])
#     input_mask_name = mm_data.modalities['image'][0].m2f_pred_mask_path[0].split(os.sep)[-2]

#     # Dirty workaround for masks in different directory
#     if input_mask_name == 'ViT_masks':
#         scan_id = scan_dir.split(os.sep)[-1]
#         mask_im_dir = osp.join("/home/fsun/data/scannet/scans", scan_id, input_mask_name)
#         refined_mask_im_dir = osp.join(scan_dir, input_mask_name + '_refined')
#     else:
#         mask_im_dir = osp.join(scan_dir, input_mask_name)
#         refined_mask_im_dir = osp.join(scan_dir, input_mask_name + '_refined')

#     if save_output:
#         print("Creating refined mask dir at ", refined_mask_im_dir)
#         os.makedirs(refined_mask_im_dir, exist_ok=True)

#     # Loop over all N views
#     for i, x in enumerate(mm_data.modalities['image'][0]):

#         # Grab the 3D points corresponding to ith view
#         start, end = pointers[i], pointers[i+1]    
#         points = point_ids[start:end]
#         pixels = pixels_full[start:end]
#         # Image (x, y) pixel index
#         w, h = pixels[:, 0], pixels[:, 1]

#         # Grab set of points visible in current view
#         mm_data_of_view = mm_data[points]

#         im_ref_w, im_ref_h = x.ref_size

#         # Get nearest neighbor interpolated projection image filled with 3D labels
#         pred_mask_2d = -1 * torch.ones((im_ref_h, im_ref_w), dtype=torch.long, device=mm_data_of_view.device)    
#         pred_mask_2d[h, w] = mm_data_of_view.data.pred.squeeze()

#         nearest_neighbor = scipy.ndimage.morphology.distance_transform_edt(
#             pred_mask_2d==-1, return_distances=False, return_indices=True)    
#         pred_mask_2d = pred_mask_2d[nearest_neighbor].numpy().astype(np.uint8)
#         pred_mask_2d = Image.fromarray(pred_mask_2d, 'L')          

#         # SAVE REFINED MASK IN GIVEN DIR
#         im_name = x.m2f_pred_mask_path[0].split("/")[-1]

#         pred_mask_2d = pred_mask_2d.resize((640, 480), resample=0)

#         if save_output:
#             pred_mask_2d.save(osp.join(refined_mask_im_dir, im_name))

#         pred_mask_2d = np.asarray(pred_mask_2d)

#         # 2D mIoU calculation for M2F labels per view
#         # Get gt 2d image
#         gt_img_path = x.m2f_pred_mask_path[0].split("/")
#         # Adjust filepath after Snellius migration
#         gt_img_path[1] = 'scratch-shared'
#         gt_img_path[-2] = 'label-filt-scannet20'
#         gt_img_path = "/".join(gt_img_path)
#         gt_img = Image.open(gt_img_path)

#         gt_img = np.asarray(gt_img.resize((640, 480), resample=0)).astype(int) - 1   # -1 label offset

#         # Input mask and refined mask for current view
#         refined_im = pred_mask_2d
#         gt_im = gt_img



#         f, axarr = plt.subplots(1, 2, figsize=(12, 8))
#         f.subplots_adjust(hspace=0.01, wspace=0.03)

# #         input_im = Image.open(osp.join(input_mask_dir, scene_id, input_mask_name, m))
# #         input_im = np.array(input_im) - 1   # label offset
# #         input_im = np.array(CLASS_COLORS)[input_im].astype(np.uint8)

# #         refined_im = Image.open(osp.join(refined_mask_dir, scene_id, refined_mask_name, m))
# #         refined_im = np.array(refined_im)
#         refined_im = np.array(CLASS_COLORS)[refined_im].astype(np.uint8)

# #         gt_im = Image.open(osp.join(refined_mask_dir, scene_id, 'label-filt-scannet20', m))
# #         gt_im = np.array(gt_im).astype(int) - 1   # label offset
#         gt_im = np.array(CLASS_COLORS)[gt_im].astype(np.uint8)

# #         im = Image.open(osp.join(refined_mask_dir, scene_id, 'color_resized', m))

#         axarr[0].set_title("ground truth")
#         axarr[1].set_title("refined mask")
# #         axarr[2].set_title("segmentation mask")
# #         axarr[3].set_title("refined mask")

#         axarr[0].imshow(gt_im) 
#         axarr[1].imshow(refined_im) 
# #         axarr[2].imshow(input_im)
# #         axarr[3].imshow(refined_im) 

#         for x in axarr:
#             x.axis('off')
#         plt.show()

In [12]:
# visualize_2d_upscaled(model, mm_data, contains_pred=True, save_output=False)

### Below is an example to visualize refined masks back-projected at full resolution

In [9]:
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
import scipy.ndimage


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [10]:
def get_semantic_image_from_camera(dataset, mesh_triangles, intrinsic, extrinsic, class_id_faces, im_size=(480, 640)):
    """
    Returns the back-projected semantic label image given camera parameters and (semantic) mesh.
    """
    
    
    # Initialize rays for given camera
    rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
        intrinsic_matrix=intrinsic,
        extrinsic_matrix=extrinsic,
        width_px=im_size[1],
        height_px=im_size[0],
    )

    # Get result
    ans = scene.cast_rays(rays)

    primitive_ids = ans['primitive_ids'].numpy()
    primitive_uvs = ans['primitive_uvs'].numpy()

    # Select the closest vertex for each valid face in the projected mesh
    valid_mask = primitive_ids != scene.INVALID_ID

    # https://stackoverflow.com/questions/45212949/vertex-of-a-3d-triangle-that-is-closest-to-a-point-given-barycentric-parameter-o
    w_coords = (1 - primitive_uvs[:, :, 0][valid_mask] - primitive_uvs[:, :, 1][valid_mask])
    barycentric_coords = np.concatenate((w_coords[:, None], primitive_uvs[valid_mask]), axis=-1)

    selected_vertex_idx = np.argmax(barycentric_coords, axis=-1)

    contained_mesh_triangles = mesh_triangles[primitive_ids[valid_mask]]
    closest_mesh_vertices = contained_mesh_triangles[range(len(barycentric_coords)), selected_vertex_idx]
    
    # Map mesh vertices to semantic label
    labels = class_id_faces[closest_mesh_vertices]
    # Remap to [0 ; num_labels - 1]
    labels = dataset.val_dataset._remap_labels(torch.tensor(labels))

    # Visualize back-projection
    image = torch.ones(im_size, dtype=torch.long) * -1
    image[valid_mask] = labels


    # NN interpolation at invalid pixels          
    nearest_neighbor = scipy.ndimage.morphology.distance_transform_edt(
        image==-1, return_distances=False, return_indices=True)    

    image = image[nearest_neighbor].numpy()
    return image

def read_axis_align_matrix(filename):
    lines = open(filename).readlines()
    axis_align_matrix = None
    for line in lines:
        if "axisAlignment" in line:
            axis_align_matrix = torch.Tensor([float(x) for x in line.rstrip().strip("axisAlignment = ").split(" ")]).reshape((4, 4))
            break
    return axis_align_matrix

def save_semantic_prediction_as_txt(tracker, model_name, mask_name):
    orginal_class_ids = np.asarray(tracker._dataset.train_dataset.valid_class_idx)
    path_to_submission = tracker._dataset.path_to_submission
    
    path_to_submission = osp.join(path_to_submission, model_name, mask_name)
    if not osp.exists(path_to_submission):
        os.makedirs(path_to_submission)
    
    for scan_id in tracker._full_preds:
        full_pred = tracker._full_preds[scan_id].cpu().numpy().astype(np.int8)
        full_pred = orginal_class_ids[full_pred]  # remap labels to original labels between 0 and 40
        scan_name = tracker._raw_datas[scan_id].scan_name
        path_file = osp.join(path_to_submission, "{}.txt".format(scan_name))
        
        np.savetxt(path_file, full_pred, delimiter="/n", fmt="%d")

In [17]:

""" 3D mIoU all points, 'using tracker.track(model, full_res=True, data=batch)'
{'val_loss_seg': 0.38128840970472433,
 'val_loss_cross_entropy': 0.38128840970472433,
 'val_acc': 90.8983347631198,
 'val_macc': 81.64234561141699,
 'val_miou': 72.73185519672101,
 'val_full_acc': 89.522088732399,
 'val_full_macc': 80.50851188208937,
 'val_full_miou': 71.04551770525589}
 
 3D mIoU all points, using 'tracker.track(pred_labels=batch.data.pred, gt_labels=batch.data.y, model=None, full_res=True)'
 {'val_acc': 90.93617664991409,
 'val_macc': 81.6409318554061,
 'val_miou': 72.77223563138047}
 
 
 3D mIoU seen points, using Evaluator class 'tracker.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)'
 {'val_acc': 93.72231797469858,
 'val_macc': 87.5327875498592,
 'val_miou': 79.52590254859804}
 
 3D mIoU all points, using Evaluator class
 {'val_acc': 92.24818352916971,
 'val_macc': 85.92306134189178,
 'val_miou': 77.10140344053868}
 
"""

" 3D mIoU all points, 'using tracker.track(model, full_res=True, data=batch)'\n{'val_loss_seg': 0.38128840970472433,\n 'val_loss_cross_entropy': 0.38128840970472433,\n 'val_acc': 90.8983347631198,\n 'val_macc': 81.64234561141699,\n 'val_miou': 72.73185519672101,\n 'val_full_acc': 89.522088732399,\n 'val_full_macc': 80.50851188208937,\n 'val_full_miou': 71.04551770525589}\n \n 3D mIoU all points, using 'tracker.track(pred_labels=batch.data.pred, gt_labels=batch.data.y, model=None, full_res=True)'\n {'val_acc': 90.93617664991409,\n 'val_macc': 81.6409318554061,\n 'val_miou': 72.77223563138047}\n \n \n 3D mIoU seen points, using Evaluator class 'tracker.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)'\n {'val_acc': 93.72231797469858,\n 'val_macc': 87.5327875498592,\n 'val_miou': 79.52590254859804}\n \n 3D mIoU all points, using Evaluator class\n {'val_acc': 92.24818352916971,\n 'val_macc': 85.92306134189178,\n 'val_miou': 77.10140344053868}\n \n"

In [7]:
tracker = ScannetSegmentationTracker(dataset=dataset, stage='val', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)
tracker.reset(stage='val')

In [11]:


# Inference stage
for batch in Ctq(dataset.val_dataloader):
#     with torch.no_grad():
#         model.set_input(batch, model.device)
#         model.forward(epoch=1)
        
        
    with torch.no_grad():
        self._model.set_input(batch, model.device)
        with torch.cuda.amp.autocast(enabled=model.is_mixed_precision()):
            self._model.forward(epoch=1)

    batch.data.pred = model.output.detach().cpu().argmax(1)

#     # 3D mIoU
#     self._tracker_mvfusion.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)
    

    batch.data.pred = model.output.detach().cpu().argmax(1)

    tracker.track(model, full_res=True, data=batch)
#     tracker.track(pred_labels=batch.data.pred, gt_labels=batch.data.y, model=None, full_res=True)

tracker.get_metrics()

  0%|          | 0/312 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [11]:
tracker = evaluator._tracker_mvfusion

In [12]:
tracker.finalise(full_res=True)
print(tracker.get_metrics())

# ViT_masks DeepSet_3D
# {'val_loss_seg': 0.28341159257942283, 'val_loss_cross_entropy': 0.28341159257942283, 'val_acc': 92.44521870741914, 'val_macc': 85.81729391336346, 'val_miou': 77.33113003915363, 'val_full_acc': 91.16445408766421, 'val_full_macc': 84.82435335385615, 'val_full_miou': 75.70483234685612}


{'val_loss_seg': 0.3198231861416181, 'val_loss_cross_entropy': 0.3198231861416181, 'val_acc': 91.71695212611478, 'val_macc': 85.31477998411393, 'val_miou': 76.0211958779641, 'val_full_acc': 90.46075123657181, 'val_full_macc': 84.42186045479552, 'val_full_miou': 74.61415390345623}


In [13]:
# Create semantic pcd
s = time()

save_semantic_prediction_as_txt(tracker, model_name=cfg.model_name, mask_name=cfg.data.m2f_preds_dirname)

print(time() - s)
tracker.get_metrics()

56.27441906929016


{'val_loss_seg': 0.3198231861416181,
 'val_loss_cross_entropy': 0.3198231861416181,
 'val_acc': 91.71695212611478,
 'val_macc': 85.31477998411393,
 'val_miou': 76.0211958779641,
 'val_full_acc': 90.46075123657181,
 'val_full_macc': 84.42186045479552,
 'val_full_miou': 74.61415390345623}

In [97]:
# ViT_masks MVFusion_3D_small_6views, all points
{'val_loss_seg': 0.3175031589165044,
 'val_loss_cross_entropy': 0.3175031589165044,
 'val_acc': 92.247659598096,
 'val_macc': 86.00159203189781,
 'val_miou': 77.15651282093833,
 'val_full_acc': 90.95395023791664,
 'val_full_macc': 85.03907557298825,
 'val_full_miou': 75.596741698556}


""" DeepSet_3D ViT masks
# {'val_loss_seg': 0.28341159257942283, 
'val_loss_cross_entropy': 0.28341159257942283, 
'val_acc': 92.44521870741914, 
'val_macc': 85.81729391336346, 
'val_miou': 77.33113003915363, 
'val_full_acc': 91.16445408766421, 
'val_full_macc': 84.82435335385615, 
'val_full_miou': 75.70483234685612}

 """

# # # _vh_clean_2.ply, .aggregation.json, _vh_clean_2.0.010000.segs.json

# scan_names=mapping_idx_to_scan_names.values()

# paths = [os.path.join(x, 'sens', 'intrinsic', 'intrinsic_depth.txt') for x in scan_names]

# for p in paths:
#     print(f"cp {p} /scratch-shared/fsun/data/scannet/scans/{p}")

# Semantic mesh to image - backprojection

In [14]:
# User input
output_image_size = (480, 640)
scans_dir = "/scratch-shared/fsun/data/scannet/scans"
preprocessed_2d_data_dir = "/scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/processed/processed_2d_val"

########################################################################################################################
input_mask_name = cfg.data.m2f_preds_dirname
scan_names = list(dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())

for scan_name in Ctq(scan_names):
    refined_mask_dir = osp.join(scans_dir, scan_name, f"{cfg.model_name}_{input_mask_name}_refined")
    
    if not osp.exists(refined_mask_dir):
        os.makedirs(refined_mask_dir)

    
    # Load data
    mesh = o3d.io.read_triangle_mesh(f"{scans_dir}/{scan_name}/{scan_name}_vh_clean_2.ply")
    mesh_triangles = np.asarray(mesh.triangles)
    mesh = o3d.t.geometry.TriangleMesh.from_legacy(mesh)
    
    # Load predicted class label per vertex
    class_id_faces = np.loadtxt(f"submission_labels/{cfg.model_name}/{input_mask_name}/{scan_name}.txt").astype(int)

    # Camera parameters
    intrinsic = np.loadtxt(f"{scans_dir}/{scan_name}/sens/intrinsic/intrinsic_depth.txt")[:3, :3]
    images = torch.load(f"{preprocessed_2d_data_dir}/{scan_name}.pt")
    
    # Undo axis alignment for extrinsics  
    axis_align_matrix_path = osp.join(scans_dir, scan_name, scan_name + '.txt')
    axis_align_matrix = read_axis_align_matrix(axis_align_matrix_path)
    inv = torch.linalg.inv(axis_align_matrix.T)
    images.extrinsic = inv.T  @ images.extrinsic        
    
    # Make world-to-camera
    extrinsics = torch.linalg.inv(images.extrinsic).numpy()
    image_names = [osp.splitext(osp.basename(x))[0] for x in images.path]

    # Raycasting
    scene = o3d.t.geometry.RaycastingScene()
    scene.add_triangles(mesh)

    for i in range(len(image_names)):
        image = get_semantic_image_from_camera(dataset=dataset, mesh_triangles=mesh_triangles, intrinsic=intrinsic,
                                               extrinsic=extrinsics[i], 
                                               class_id_faces=class_id_faces, im_size=output_image_size)
        

        # Save refined prediction (backprojected from mesh + interpolated missing pixels)
        image = Image.fromarray(image.astype(np.uint8), 'L')
        im_save_path = osp.join(refined_mask_dir, image_names[i] + '.png')
        image.save(im_save_path)
                
#         # to rgb
#         image = np.array(CLASS_COLORS)[image].astype(np.uint8)        


  0%|          | 0/312 [00:00<?, ?it/s]

# Evaluate refined masks

In [15]:
tracker_2d = ScannetSegmentationTracker(dataset=dataset, stage='val', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)
tracker_2d.reset(stage='val')

In [16]:
input_mask_name = cfg.data.m2f_preds_dirname
print(input_mask_name)

scans_dir = "/scratch-shared/fsun/data/scannet/scans"
scan_names = list(dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())

for batch in Ctq(dataset.val_dataloader):
    scan_name = mapping_idx_to_scan_names[batch.id_scan.item()]
    
    gt_dir = osp.join(scans_dir, scan_name, 'label-filt-scannet20')
    mask_dir = osp.join(scans_dir, scan_name, f"{cfg.model_name}_{input_mask_name}_refined")
    
    im_names = [osp.basename(x) for x in batch.modalities['image'][0].m2f_pred_mask_path]

    for im in im_names:
        gt = Image.open(osp.join(gt_dir, im))
        mask = Image.open(osp.join(mask_dir, im))
        gt = gt.resize((640, 480), 0)

        gt = np.asarray(gt).astype(float) - 1
        mask = np.asarray(mask)

        tracker_2d.track(pred_labels=mask, gt_labels=gt, model=None)
        
        
        # to rgb
        mask = np.array(CLASS_COLORS)[mask].astype(np.uint8)    
        plt.imshow(mask)
    
tracker_2d.get_metrics()

m2f_masks


  0%|          | 0/312 [00:00<?, ?it/s]

{'val_acc': 92.32357209962426,
 'val_macc': 87.1733790590776,
 'val_miou': 78.7819210985855}

In [6]:
a = '88.7 & 95.3 & 71.3 & 84.2 & 86.4 & 73.3 & 84.1 & 73.3 & 65.1 & 74.6 & 54.4 & 62.3 & 69.9 & 78.6 & 81.8 & 75.2 & 93.1 & 72.3 & 81.0 & 61.6'.split(" & ")
a = [float(x) for x in a]
import numpy as np
np.mean(a)

76.32499999999999

In [17]:
result = tracker_2d._miou_per_class
latex = []
for v in result.values():
    latex.append(str(np.round(float(v), 1)))

" & ".join(latex)

# MVFusion_3D_small_6views ViT_masks 
# {'val_acc': 92.88391039675598,
#  'val_macc': 87.96442707451335,
#  'val_miou': 79.72876416747103}
# '89.8 & 95.8 & 75.1 & 86.9 & 90.9 & 85.5 & 85.9 & 80.4 & 73.9 & 77.4 & 36.8 & 66.5 & 74.6 & 81.9 & 85.4 & 84.8 & 94.8 & 74.0 & 88.0 & 66.3'

# MVFusion_3D_small_6views m2f_masks
# '88.7 & 95.3 & 71.3 & 84.2 & 86.4 & 73.3 & 84.1 & 73.3 & 65.1 & 74.6 & 54.4 & 62.3 & 69.9 & 78.6 & 81.8 & 75.2 & 93.1 & 72.3 & 81.0 & 61.6'


### DeepSet_3D ViT_masks
# {'val_acc': 93.03383708073024,
#  'val_macc': 87.56107269530546,
#  'val_miou': 79.80734383927364}
# 90.0 & 95.8 & 74.1 & 86.4 & 91.3 & 85.7 & 87.1 & 81.2 & 74.4 & 76.9 & 38.1 & 66.0 & 76.9 & 81.5 & 83.7 & 81.8 & 94.8 & 75.5 & 88.4 & 66.3


### DeepSet_3D m2f_masks
# {'val_acc': 92.32357209962426,
#  'val_macc': 87.1733790590776,
#  'val_miou': 78.7819210985855}
# 89.7 & 95.8 & 73.5 & 86.3 & 88.5 & 81.6 & 85.7 & 73.5 & 66.7 & 79.3 & 52.7 & 67.3 & 72.0 & 82.5 & 80.4 & 81.4 & 94.2 & 73.8 & 86.9 & 63.7

'89.7 & 95.8 & 73.5 & 86.3 & 88.5 & 81.6 & 85.7 & 73.5 & 66.7 & 79.3 & 52.7 & 67.3 & 72.0 & 82.5 & 80.4 & 81.4 & 94.2 & 73.8 & 86.9 & 63.7'

In [21]:
# # # _vh_clean_2.ply, .aggregation.json, _vh_clean_2.0.010000.segs.json

# scan_names=mapping_idx_to_scan_names.values()

# paths = [os.path.join(x, 'sens', 'intrinsic', 'intrinsic_depth.txt') for x in scan_names]

# for p in paths:
#     print(f"cp {p} /scratch-shared/fsun/data/scannet/scans/{p}")

# scans_dir = "/scratch-shared/fsun/data/scannet/scans"
# for scan_id in os.listdir(scans_dir):
#     mask_dir = osp.join(scans_dir, scan_id, 'm2f_masks')
#     label_dir = osp.join(scans_dir, scan_id, 'label-filt-scannet20')
    
#     if len(os.listdir(mask_dir)) < len(os.listdir(label_dir)):
#         print(f"cp -r /lisa_migration/project/fsun/data/scannet/scans/{scan_id}/m2f_masks /scratch-shared/fsun/data/scannet/scans/{scan_id}/m2f_masks")

scans_dir = "/scratch-shared/fsun/data/scannet/scans"
for scan_id in os.listdir(scans_dir):
    s_dir = osp.join(scans_dir, scan_id)
    mask_dir = osp.join(scans_dir, scan_id, 'm2f_masks')
    
    if osp.exists(osp.join(mask_dir, 'm2f_masks')):
        print(f"mv {s_dir}/m2f_masks/m2f_masks {s_dir}/m2f_masks_temp")
        print(f"rm -rf {s_dir}/m2f_masks")
        print(f"mv {s_dir}/m2f_masks_temp {s_dir}/m2f_masks")
        

mv /scratch-shared/fsun/data/scannet/scans/scene0029_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0029_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0029_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0029_01/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0029_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0080_02/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0080_02/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0080_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0080_02/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0080_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0581_02/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0581_02/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0581_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0581_02/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0319_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0319_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0319_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0319_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0319_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0447_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0447_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0447_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0447_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0447_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0175_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0175_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0175_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0175_00/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0261_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0261_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0261_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0261_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0261_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0121_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0121_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0121_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0121_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0121_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0517_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0517_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0517_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0517_01/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0411_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0411_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0411_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0411_01/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0411_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0013_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0013_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0013_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0013_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0013_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0579_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0579_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0579_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0579_00/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0444_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0444_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0444_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0444_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0444_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0332_02/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0332_02/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0332_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0332_02/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0332_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0044_02/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0044_02/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0044_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0044_02/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0258_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0258_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0258_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0258_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0258_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0337_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0337_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0337_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0337_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0337_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0348_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0348_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0348_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0348_01/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0325_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0325_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0325_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0325_01/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0325_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0381_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0381_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0381_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0381_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0381_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0540_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0540_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0540_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0540_01/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0416_03/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0416_03/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0416_03/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0416_03/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0416_03/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0271_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0271_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0271_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0271_01/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0271_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0003_01/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0003_01/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0003_01/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0003_01/m2f_masks_temp /scratch-shared/fsu

mv /scratch-shared/fsun/data/scannet/scans/scene0322_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0322_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0322_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0322_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0322_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0498_00/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0498_00/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0498_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0498_00/m2f_masks_temp /scratch-shared/fsun/data/scannet/scans/scene0498_00/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0502_02/m2f_masks/m2f_masks /scratch-shared/fsun/data/scannet/scans/scene0502_02/m2f_masks_temp
rm -rf /scratch-shared/fsun/data/scannet/scans/scene0502_02/m2f_masks
mv /scratch-shared/fsun/data/scannet/scans/scene0502_02/m2f_masks_temp /scratch-shared/fsu