# Final Evaluation - Thesis Report

 - Evaluate on all images in a scene


In [1]:
# Evaluation variables

MASK_NAME = 'ViT_masks'



############# View-Fusion evaluation 
### Mask2Former

# None

# Majority vote

# Transformer

# DeepSet

### ViT-Adapter

# None

# Majority vote

# Transformer
# # MVFusion_orig
# CHECKPOINT_DIR = '/home/fsun/DeepViewAgg/outputs/MVFusion_orig'

# DeepSet


############# View-Fusion + 3D network evaluation
### Mask2Former

# Linear input layer

# Majority vote

# Transformer
# M2F masks 6 views small
# CHECKPOINT_DIR = "/home/fsun/DeepViewAgg/outputs/MVFusion_3D_6_views_m2f_masks"

# DeepSet

### ViT-Adapter

# Linear input layer

# Majority vote

# Transformer
# ViT_masks 6 views small
# CHECKPOINT_DIR = '/home/fsun/DeepViewAgg/outputs/ViT_masks_3rd_run' # 3rd run

# DeepSet




dataset_config = 'segmentation/multimodal/Feng/scannet-neucon-smallres-m2f.yaml'   
models_config = 'segmentation/multimodal/Feng/mvfusion'    # model family
model_name = 'MVFusion_3D_small_6views'                       # specific model

In [1]:
# Uncomment to use autoreload
%load_ext autoreload
%autoreload 2

import os
import os.path as osp
import sys
import torch
import numpy as np
from time import time
from omegaconf import OmegaConf
start = time()
import warnings
warnings.filterwarnings('ignore')

# torch.cuda.set_device(I_GPU)
DIR = os.path.dirname(os.getcwd())
ROOT = os.path.join(DIR, "..")
sys.path.insert(0, ROOT)
sys.path.insert(0, DIR)

from torch_points3d.utils.config import hydra_read
from torch_geometric.data import Data
from torch_points3d.core.multimodal.data import MMData, MMBatch
from torch_points3d.visualization.multimodal_data import visualize_mm_data
from torch_points3d.core.multimodal.image import SameSettingImageData, ImageData
from torch_points3d.datasets.segmentation.multimodal.scannet import ScannetDatasetMM
from torch_points3d.datasets.segmentation.scannet import CLASS_COLORS, CLASS_NAMES, CLASS_LABELS
from torch_points3d.metrics.segmentation_tracker import SegmentationTracker
from torch_points3d.datasets.segmentation import IGNORE_LABEL
from torch_points3d.metrics.scannet_segmentation_tracker import ScannetSegmentationTracker
from torch_points3d.metrics.colored_tqdm import Coloredtqdm as Ctq


from PIL import Image

import matplotlib.pyplot as plt 

%matplotlib inline

CLASS_COLORS[0] = (174.0, 199.0, 232.0)
CLASS_COLORS[-1] = (0, 0, 0)
import plotly.io as pio

#pio.renderers.default = 'jupyterlab'        # for local notebook
pio.renderers.default = 'iframe_connected'  # for remote notebook. Other working (but seemingly slower) options are: 'sphinx_gallery' and 'iframe'

MMData debug() function changed, please uncomment the 3rd assert line when doing inference without M2F features!


In [3]:
# Set your dataset root directory, where the data was/will be downloaded
DATA_ROOT = '/scratch-shared/fsun/dvata'


overrides = [
    'task=segmentation',
    f'data={dataset_config}',
    f'models={models_config}',
    f'model_name={model_name}',
    f'data.dataroot={DATA_ROOT}',
]

cfg = hydra_read(overrides)
OmegaConf.set_struct(cfg, False)  # This allows getattr and hasattr methods to function correctly
cfg.data.load_m2f_masks = True   # load Mask2Former predicted masks
cfg.data.m2f_preds_dirname = MASK_NAME
cfg.data.n_views = cfg.models[model_name].backbone.transformer.n_views
print(cfg.data.n_views)

# Dataset instantiation
start = time()
dataset = ScannetDatasetMM(cfg.data)
# print(dataset)|
print(f"Time = {time() - start:0.1f} sec.")

6
Load predicted 2D semantic segmentation labels from directory  ViT_masks
initialize train dataset
initialize val dataset
Time = 8.3 sec.


In [None]:
from torch_points3d.models.model_factory import instantiate_model

# Create the model
print(f"Creating model: {cfg.model_name}")
model = instantiate_model(cfg, dataset)
# print(model)

# Load the checkpoint and recover the 'best_miou' model weights
checkpoint = torch.load(f'{CHECKPOINT_DIR}/{model_name}.pt', map_location='cpu')
model.load_state_dict_with_same_shape(checkpoint['models']['best_miou'], strict=False)

# Prepare the model for training
model = model.cuda()
print('Model loaded')

In [None]:
import pandas as pd
pd.set_option('display.max_rows', 50)

# Create validation loader
dataset.create_dataloaders(
    model,
    1,
    False,
    17,
    False,
    train_only=False,
    val_only=True,
    test_batch_size=1
)

mapping_idx_to_scan_names = getattr(dataset.val_dataset, "MAPPING_IDX_TO_SCAN_{}_NAMES".format(dataset.val_dataset.split.upper()))


In [2]:
# Functions for evaluation

def get_seen_points(mm_data):
    ### Select seen points
    csr_idx = mm_data.modalities['image'][0].view_csr_indexing
    dense_idx_list = torch.arange(mm_data.modalities['image'][0].num_points).repeat_interleave(csr_idx[1:] - csr_idx[:-1])
    # take subset of only seen points without re-indexing the same point
    mm_data = mm_data[dense_idx_list.unique()]
    return mm_data

def get_mode_pred(data):
    pixel_validity = data.data.mvfusion_input[:, :, 0].bool()
    mv_preds = data.data.mvfusion_input[:, :, -1].long()
            
    valid_m2f_feats = []
    for i in range(len(mv_preds)):
        valid_m2f_feats.append(mv_preds[i][pixel_validity[i]])

    mode_preds = []
    for m2feats_of_seen_point in valid_m2f_feats:
        mode_preds.append(torch.mode(m2feats_of_seen_point.squeeze(), dim=0)[0])
    mode_preds = torch.stack(mode_preds, dim=0)
        
    return mode_preds

def get_normalized_entropy(labels):
    counts = torch.unique(labels, return_counts=True)[1]
    
    pk = counts / counts.sum()
    len_pk = torch.tensor(len(pk))
    if len_pk == 1:
        normalized_entropy = 0.
    else:
        normalized_entropy = -sum(pk * torch.log2(pk)) / torch.log2(len_pk)
    return normalized_entropy
        
def get_semantic_image_from_camera(dataset, mesh_triangles, intrinsic, extrinsic, class_id_faces, im_size=(480, 640)):
    """
    Returns the back-projected semantic label image given camera parameters and (semantic) mesh.
    """
    
    # Initialize rays for given camera
    rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
        intrinsic_matrix=intrinsic,
        extrinsic_matrix=extrinsic,
        width_px=im_size[1],
        height_px=im_size[0],
    )

    # Get result
    ans = scene.cast_rays(rays)

    primitive_ids = ans['primitive_ids'].numpy()
    primitive_uvs = ans['primitive_uvs'].numpy()

    # Select the closest vertex for each valid face in the projected mesh
    valid_mask = primitive_ids != scene.INVALID_ID

    # https://stackoverflow.com/questions/45212949/vertex-of-a-3d-triangle-that-is-closest-to-a-point-given-barycentric-parameter-o
    w_coords = (1 - primitive_uvs[:, :, 0][valid_mask] - primitive_uvs[:, :, 1][valid_mask])
    barycentric_coords = np.concatenate((w_coords[:, None], primitive_uvs[valid_mask]), axis=-1)

    selected_vertex_idx = np.argmax(barycentric_coords, axis=-1)

    contained_mesh_triangles = mesh_triangles[primitive_ids[valid_mask]]
    closest_mesh_vertices = contained_mesh_triangles[range(len(barycentric_coords)), selected_vertex_idx]
    
    # Map mesh vertices to semantic label
    labels = class_id_faces[closest_mesh_vertices]
    # Remap to [0 ; num_labels - 1]
    labels = dataset.val_dataset._remap_labels(torch.tensor(labels))

    # Visualize back-projection
    image = torch.ones(im_size, dtype=torch.long) * -1
    image[valid_mask] = labels


    # NN interpolation at invalid pixels          
    nearest_neighbor = scipy.ndimage.morphology.distance_transform_edt(
        image==-1, return_distances=False, return_indices=True)    

    image = image[nearest_neighbor].numpy()
    return image

def read_axis_align_matrix(filename):
    lines = open(filename).readlines()
    axis_align_matrix = None
    for line in lines:
        if "axisAlignment" in line:
            axis_align_matrix = torch.Tensor([float(x) for x in line.rstrip().strip("axisAlignment = ").split(" ")]).reshape((4, 4))
            break
    return axis_align_matrix

def save_semantic_prediction_as_txt(tracker, model_name, mask_name):
    orginal_class_ids = np.asarray(tracker._dataset.train_dataset.valid_class_idx)
    path_to_submission = tracker._dataset.path_to_submission
    
    path_to_submission = osp.join(path_to_submission, model_name, mask_name)
    if not osp.exists(path_to_submission):
        os.makedirs(path_to_submission)
    
    for scan_id in tracker._full_preds:
        full_pred = tracker._full_preds[scan_id].cpu().numpy().astype(np.int8)
        full_pred = orginal_class_ids[full_pred]  # remap labels to original labels between 0 and 40
        scan_name = tracker._raw_datas[scan_id].scan_name
        path_file = osp.join(path_to_submission, "{}.txt".format(scan_name))
        
        np.savetxt(path_file, full_pred, delimiter="/n", fmt="%d")
        
    return path_to_submission
        
        
def mesh_to_image(cfg, dataset, path_to_submission, scans_dir, save_output='if_not_exists'):
    # User input
    output_image_size = (480, 640)
    preprocessed_2d_data_dir = "/scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/processed/processed_2d_val"

    ########################################################################################################################
    input_mask_name = cfg.data.m2f_preds_dirname
    scan_names = list(dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())
    
    path_to_submission = osp.join(path_to_submission, cfg.model_name, input_mask_name)

    for scan_name in Ctq(scan_names):
        # Output folder location
        refined_mask_dir = osp.join(scans_dir, scan_name, f"{input_mask_name}_refined", f"{cfg.model_name}")

        # Skip this step if output folder already exists
        if not osp.exists(refined_mask_dir) and save_output == 'if_not_exists':
            os.makedirs(refined_mask_dir)

            # Load data
            mesh = o3d.io.read_triangle_mesh(f"{scans_dir}/{scan_name}/{scan_name}_vh_clean_2.ply")
            mesh_triangles = np.asarray(mesh.triangles)
            mesh = o3d.t.geometry.TriangleMesh.from_legacy(mesh)

            # Load predicted class label per vertex
            class_id_faces = np.loadtxt(f"{path_to_submission}/{scan_name}.txt").astype(int)

            # Camera parameters
            intrinsic = np.loadtxt(f"{scans_dir}/{scan_name}/sens/intrinsic/intrinsic_depth.txt")[:3, :3]
            images = torch.load(f"{preprocessed_2d_data_dir}/{scan_name}.pt")

            # Undo axis alignment for extrinsics  
            axis_align_matrix_path = osp.join(scans_dir, scan_name, scan_name + '.txt')
            axis_align_matrix = read_axis_align_matrix(axis_align_matrix_path)
            inv = torch.linalg.inv(axis_align_matrix.T)
            images.extrinsic = inv.T  @ images.extrinsic        

            # Make world-to-camera
            extrinsics = torch.linalg.inv(images.extrinsic).numpy()
            image_names = [osp.splitext(osp.basename(x))[0] for x in images.path]

            # Raycasting
            scene = o3d.t.geometry.RaycastingScene()
            scene.add_triangles(mesh)

            for i in range(len(image_names)):
                image = get_semantic_image_from_camera(dataset=dataset, mesh_triangles=mesh_triangles, intrinsic=intrinsic,
                                                       extrinsic=extrinsics[i], 
                                                       class_id_faces=class_id_faces, im_size=output_image_size)


                # Save refined prediction (backprojected from mesh + interpolated missing pixels)
                image = Image.fromarray(image.astype(np.uint8), 'L')
                im_save_path = osp.join(refined_mask_dir, image_names[i] + '.png')
                image.save(im_save_path)   
                
        else:
            print("Output directory already exists!")

In [4]:
# Evaluator class imports
from torch_points3d.models.model_factory import instantiate_model

import scipy.ndimage
import numpy as np
from PIL import Image

class Evaluator():
    
    def __init__(self, cfg, dataset, checkpoint_dir):
        self.scans_dir = "/scratch-shared/fsun/data/scannet/scans"

        self._dataset = dataset
        self._cfg = cfg
        self.wandb_log = False
        self.tensorboard_log = False
        
        # Create the model
        print(f"Creating model: {self._cfg.model_name}")
        model = instantiate_model(self._cfg, self._dataset)

        # Load the checkpoint and recover the 'best_miou' model weights
        checkpoint = torch.load(f'{checkpoint_dir}/{self._cfg.model_name}.pt', map_location='cpu')
        model.load_state_dict_with_same_shape(checkpoint['models']['best_miou'], strict=False)

        # Prepare the model for training
        self._model = model.cuda()
        print(self._device)
#         self._device = model.device()
    
    def eval_all_metrics(self, stage_name=""):
        self._is_training = False
        
        self._tracker_refined: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
        self._tracker_refined_seen_points: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)            
                        
        if self._dataset.has_val_loader:
            if not stage_name or stage_name == "val":
                self._test_refined(epoch=1, stage_name="val")   
                
        # Upscale predictions containing all points to 0.01 voxel size and save point cloud predictions   
        self._tracker_refined.finalise(full_res=True)
        path_to_submission = save_semantic_prediction_as_txt(
            self._tracker_refined, self._cfg.model_name, self._dataset.m2f_preds_dirname)
        
        # Back-project semantic mesh (from pcd) to 2D images given the maximum number of views per scene, and save.
        # Skips this step if refined images already exist for given model and mask
        mesh_to_image(self._cfg, self._dataset, path_to_submission, self.scans_dir, save_output='if_not_exists') 
        
        # Evaluate 2D semantic segmentation
        self._tracker_refined_2d_iou: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
        
        self._evaluate_2d_iou()
        
        # Evaluate 2D cross-view consistency        
        self._evaluate_2d_CC()        
        
#         # Evaluate 2D temporal consistency
#         self._evaluate_2d_TC()        

    def _evaluate_2d_CC(self):
        
        temp_tracker: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
                
        self.instance_count = torch.zeros(2, 20)
        self.cum_n_entropy = torch.zeros(2, 20)

        with Ctq(self._dataset.val_dataloader) as tq_loader:
            for batch in tq_loader:
                
                with torch.no_grad():
                    self._model.set_input(batch, self._device)
                    
                    with torch.cuda.amp.autocast(enabled=self._model.is_mixed_precision()):
                        self._model.forward(epoch=1)

                    batch.data.pred = self._model.output.detach().cpu().argmax(1)

                    batch = get_seen_points(batch)


                    temp_tracker.track(model=None, pred_labels=batch.data.pred, gt_labels=batch.data.y)

                    # Accumulate entropy of seen points
                    add_entropy_to_accumulator(batch, cum_n_entropy)
            
        print("3D seen points metrics")
        print(temp_tracker.get_metrics())
        
        self.cum_n_entropy = self.cum_n_entropy / (self.instance_count + 1e-8)
        self.crossview_consistency = 1 - self.cum_n_entropy
        print("2D cross-view consistency (CC) for refined masks")
        print(self.crossview_consistency[1])
        print("Mean: ", self.crossview_consistency[1].mean())
        
        print("2D cross-view consistency (CC) for input masks")
        print(self.crossview_consistency[0])
        print("Mean: ", self.crossview_consistency[0].mean())


        
                
    def add_entropy_scores_to_cumulator(self, mm_data):
        for instance_id in mm_data.data['instance_labels'].unique():

            if instance_id == 0:
                continue

            instance_mask = mm_data.data['instance_labels'] == instance_id
            instance = mm_data[instance_mask]
            instance_class = instance.y[0]

            # Skip invalid semantic class
            if instance_class == IGNORE_LABEL:
                continue

            # Track all per-point predictions of active views for current instance. Some points have more 
            # predictions than others.
            input_preds = instance.modalities['image'][0].get_mapped_m2f_features().squeeze()
            output_preds = instance.modalities['image'][0].get_mapped_gt_labels().squeeze() + 1   # +1 label offset

            input_n_entropy = get_normalized_entropy(input_preds)
            output_n_entropy = get_normalized_entropy(output_preds)

            self.cum_n_entropy[0, instance_class] += input_n_entropy
            self.cum_n_entropy[1, instance_class] += output_n_entropy
            self.instance_count[:, instance_class] += 1

        
    def _evaluate_2d_iou(self):
        input_mask_name = self._cfg.data.m2f_preds_dirname

        scan_names = list(self._dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())
        
        self._tracker_refined_2d_iou.reset(stage="val")

        with Ctq(self._dataset.val_dataloader) as tq_loader:
            for batch in tq_loader:
                scan_name = mapping_idx_to_scan_names[batch.id_scan.item()]

                gt_dir = osp.join(self.scans_dir, scan_name, 'label-filt-scannet20')
                mask_dir = osp.join(self.scans_dir, scan_name, f"{cfg.model_name}_{input_mask_name}_refined")

                im_names = [osp.basename(x) for x in batch.modalities['image'][0].m2f_pred_mask_path]

                for im in im_names:
                    gt = Image.open(osp.join(gt_dir, im))
                    mask = Image.open(osp.join(mask_dir, im))
                    gt = gt.resize((640, 480), 0)

                    gt = np.asarray(gt).astype(float) - 1
                    mask = np.asarray(mask)

                    self._tracker_refined_2d_iou.track(pred_labels=mask, gt_labels=gt, model=None)

        print("2D evaluation of refined masks")
        print(self._tracker_refined_2d_iou.get_metrics())
        print(self._tracker_refined_2d_iou._miou_per_class)       
            
    def _test_refined(self, epoch, stage_name: str):

        loaders = [self._dataset.val_dataloader]

        self._model.eval()
            
        for loader in loaders:
            print("Input mask name: ", loader.dataset.m2f_preds_dirname)
            
            stage_name = loader.dataset.name
            self._tracker_refined.reset(stage_name)
            self._tracker_refined_seen_points.reset(stage_name)
            
            with Ctq(loader) as tq_loader:
                for data in tq_loader:
                    with torch.no_grad():

                        self._model.set_input(data, self._device)
                        with torch.cuda.amp.autocast(enabled=self._model.is_mixed_precision()):
                            self._model.forward(epoch=epoch)

                        data.data.pred = self._model.output.detach().cpu().argmax(1)

                        # 3D mIoU, all points
                        self._tracker_refined.track(model, full_res=True, data=data)

                        # 3D mIoU, seen points
                        data = get_seen_points(data)
                        self._tracker_refined_seen_points.track(pred_labels=data.data.pred, gt_labels=data.data.y, model=None)


                    tq_loader.set_postfix(**self._tracker_refined.get_metrics())

            print("Evaluated scores for 3D semantic segmentation on all points: ")
            
            print("--- Refined 3D ---")
            print(self._tracker_refined.get_metrics())
            print(self._tracker_refined._miou_per_class)
            
            print("Evaluated scores for 3D semantic segmentation on all points: ")
    
            print("--- Refined 3D ---")
            print(self._tracker_refined_seen_points.get_metrics())
            print(self._tracker_refined_seen_points._miou_per_class)
            
                            
        
    def eval_baseline(self, stage_name=""):
        self._is_training = False
        
        self._tracker_baseline: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
            
        self._tracker_baseline_2d: BaseTracker = self._dataset.get_tracker(
            self.wandb_log, self.tensorboard_log)
                        
        epoch = 1

        if self._dataset.has_val_loader:
            if not stage_name or stage_name == "val":
                self._test_baseline(epoch, "val")   


In [38]:
# scans_dir = ""
# scan_names = list(dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())
# input_mask_name = 'ViT_masks'
# model_name = "Deepset_3D"
# for scan_name in scan_names:
#     old_dir = osp.join(scans_dir, scan_name, f"{model_name}_{input_mask_name}_refined")
    
# #     temp_dir = osp.join(scans_dir, scan_name, f"{model_name}_{input_mask_name}_refined_temp")

#     mask_dir = osp.join(scans_dir, scan_name, f"{input_mask_name}_refined")
    
#     output_dir = osp.join(mask_dir, f"{model_name}")
    
#     print(f"mkdir {mask_dir}")
    
#     print(f"mv {old_dir} {output_dir}")
    


In [26]:
# scans_dir = ""
# scan_names = list(dataset.val_dataset.MAPPING_IDX_TO_SCAN_VAL_NAMES.values())
# input_mask_name = 'm2f_masks'
# temp_name = "MVFusion_3D_small_6views"
# for scan_name in scan_names:
#     old_dir = osp.join(scans_dir, scan_name, f"{input_mask_name}_refined")
    
#     temp_dir = osp.join(scans_dir, scan_name, f"{temp_name}")

#     mask_dir = osp.join(scans_dir, scan_name, f"{input_mask_name}_refined")
#     print(f"mv {old_dir} {temp_dir}")
    
#     print(f"mkdir {mask_dir}")
    
#     print(f"mv {temp_dir} {mask_dir}")
    

In [6]:
### Note: set over to Python file with argparse from script file
MASK_NAME = 'ViT_masks'

dataset_config = 'segmentation/multimodal/Feng/scannet-neucon-smallres-m2f-allviews.yaml'   
models_config = 'segmentation/multimodal/Feng/mvfusion'    # model family
model_name = 'MVFusion_3D_small_6views'                       # specific model

overrides = [
    'task=segmentation',
    f'data={dataset_config}',
    f'models={models_config}',
    f'model_name={model_name}',
]

cfg = hydra_read(overrides)
OmegaConf.set_struct(cfg, False)  # This allows getattr and hasattr methods to function correctly
cfg.data.load_m2f_masks = True   # load input masks
cfg.data.m2f_preds_dirname = MASK_NAME
cfg.data.n_views = cfg.models[model_name].backbone.transformer.n_views
print(cfg.data.n_views)

# Dataset instantiation
start = time()
dataset = ScannetDatasetMM(cfg.data)
print(f"Dataset Creation Time = {time() - start:0.1f} sec.")


# Transformer
# ViT_masks 6 views small
checkpoint_dir = '/home/fsun/DeepViewAgg/outputs/ViT_masks_3rd_run' # 3rd run


evaluator = Evaluator(cfg, dataset, checkpoint_dir=checkpoint_dir)
evaluator.eval_all_metrics(stage_name='val')

6
Load predicted 2D semantic segmentation labels from directory  ViT_masks
initialize train dataset
initialize val dataset
Dataset Creation Time = 9.1 sec.
Creating model: MVFusion_3D_small_6views
task:  segmentation.multimodal
tested_model_name:  MVFusion_3D_small_6views
class_name:  MVFusionAPIModel
model_module:  torch_points3d.models.segmentation.multimodal.Feng.mvfusion_3d
name, cls of chosen model_cls:  MVFusionAPIModel <class 'torch_points3d.models.segmentation.multimodal.Feng.mvfusion_3d.MVFusionAPIModel'>
x feature dim:  {'FEAT': 3}
nc_in:  67
nc_in:  64
nc_in:  32
nc_in:  64
nc_in:  128
Return attention maps!
nc_in:  256
nc_in:  128
nc_in:  128
nc_in:  96
nc_in:  96


TypeError: 'torch.device' object is not callable

### Cross-view entropy

- 1. gather all points of an instance

For now, an easy way to measure entropy is to:
- 2. aggregate all predictions from active views of this instance,
- 3a. group instances per semantic class
- 3b. loop over all validation scenes
- 4. measure the entropy per semantic class 

Better method to capture entropy is:
- Measure entropy score for each instance (because aggregating over all scenes puts a focus on label correctness instead of surface consistency)
- Then aggregate entropy scores between all scenes

In [97]:
# input_entropy_tracker = ScannetSegmentationTracker(dataset=dataset, stage='train', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)
# output_entropy_tracker = ScannetSegmentationTracker(dataset=dataset, stage='val', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)
# gt_entropy_tracker = ScannetSegmentationTracker(dataset=dataset, stage='val', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)


# idx = 0
# mapping_idx_to_scan_names = getattr(dataset.val_dataset, "MAPPING_IDX_TO_SCAN_{}_NAMES".format(dataset.val_dataset.split.upper()))
# scan_name = mapping_idx_to_scan_names[idx]

In [98]:
# instance_labels = get_instance_labels(dataset.val_dataset, scan_name)
# mm_data = dataset.val_dataset[idx]

In [99]:
# mm_data.data['instance_labels'] = instance_labels
# mm_data


In [11]:
def get_normalized_entropy(labels):
    counts = torch.unique(labels, return_counts=True)[1]
    
    pk = counts / counts.sum()
    len_pk = torch.tensor(len(pk))
    if len_pk == 1:
        normalized_entropy = 0.
    else:
        normalized_entropy = -sum(pk * torch.log2(pk)) / torch.log2(len_pk)
    return normalized_entropy
    
tracker = ScannetSegmentationTracker(dataset=dataset, stage='train', wandb_log=False, use_tensorboard=False, ignore_label=IGNORE_LABEL)
tracker.reset(stage='train')

instance_count = torch.zeros(2, 20)
cum_n_entropy = torch.zeros(2, 20)

for mm_data in dataset._val_loader:
    print(mm_data.id_scan)
    
    with torch.no_grad():
        model.set_input(mm_data, model.device)
        model(mm_data)

    mm_data.data.pred = model.output.detach().cpu().argmax(1)
    
    mm_data = get_seen_points(mm_data)
    
    tracker.track(model=None, pred_labels=mm_data.data.pred, gt_labels=mm_data.data.y)


    for instance_id in mm_data.data['instance_labels'].unique():

        if instance_id == 0:
            continue

        instance_mask = mm_data.data['instance_labels'] == instance_id
        instance = mm_data[instance_mask]
        instance_class = instance.y[0]
        
        # Skip invalid semantic class
        if instance_class == -1:
            continue

        # Track all per-point predictions of active views for current instance. Some points have more 
        # predictions than others.
        input_preds = instance.modalities['image'][0].get_mapped_m2f_features().squeeze()
        output_preds = instance.modalities['image'][0].get_mapped_gt_labels().squeeze() + 1   # +1 label offset

        input_n_entropy = get_normalized_entropy(input_preds)
        output_n_entropy = get_normalized_entropy(output_preds)
                
        cum_n_entropy[0, instance_class] += input_n_entropy
        cum_n_entropy[1, instance_class] += output_n_entropy
        instance_count[:, instance_class] += 1
        
                
        

    #     # GT views contain invalid label, so mask those points out
    #     output_preds_valid_mask = output_preds != -1

#         gt_label = instance.y.unique()    
#         gt_labels = gt_label.repeat(len(input_preds))

#         input_entropy_tracker.track(model=None, pred_labels=input_preds, gt_labels=gt_labels)
#         output_entropy_tracker.track(model=None, pred_labels=output_preds, 
#                                      gt_labels=gt_labels)
        


    #     input_mask_labels, input_mask_counts = instance.modalities['image'][0].get_mapped_m2f_features().unique(return_counts=True)
    #     print(input_mask_labels, input_mask_counts)

print("3D seen points metrics")
print(tracker.get_metrics())
print(cum_n_entropy)
print(instance_count)

print("Final normalized entropy matrix over all instances")
print(cum_n_entropy / (instance_count + 1e-8))

""" ViT Masks refined
3D seen points metrics
{'train_acc': 92.66021202577072, 'train_macc': 83.70569221362946, 'train_miou': 75.68768085547666}
tensor([[312.2038,  26.8408,  94.0868,  14.3544, 503.6727,  21.2325, 122.7478,
         150.3396,  97.9678,  15.0548, 109.4467,  19.5293,  34.5468,  19.8944,
          14.7285,   8.3424,  18.4457,  42.0031,   4.7867, 179.6838],
        [229.2438,   9.2486,  56.4155,   7.0754, 120.2393,   7.9409,  44.4460,
         102.9044,  87.2189,   6.8330,  76.6685,  13.7709,  12.5995,  12.4757,
           5.0266,   3.5362,   5.3676,  36.9687,   2.2857,  89.8911]])
tensor([[1781.,  319.,  372.,   81., 1367.,   96.,  350.,  465.,  282.,   77.,
          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  528.],
        [1781.,  319.,  372.,   81., 1367.,   96.,  350.,  465.,  282.,   77.,
          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  528.]])
Final normalized entropy matrix over all instances
tensor([[0.1753, 0.0841, 0.2529, 0.1772, 0.3685, 0.2212, 0.3507, 0.3233, 0.3474,
         0.1955, 0.4930, 0.3756, 0.2720, 0.2969, 0.2584, 0.2979, 0.3180, 0.4286,
         0.1544, 0.3403],
        [0.1287, 0.0290, 0.1517, 0.0874, 0.0880, 0.0827, 0.1270, 0.2213, 0.3093,
         0.0887, 0.3454, 0.2648, 0.0992, 0.1862, 0.0882, 0.1263, 0.0925, 0.3772,
         0.0737, 0.1702]])
         
         
M2F_masks
3D seen points metrics
{'train_acc': 92.00312846686491, 'train_macc': 83.197711066367, 'train_miou': 74.68883256172798}
tensor([[344.9477,  29.3845, 104.1460,  15.6663, 452.5604,  25.0990, 128.4360,
         163.6485, 112.5904,  15.9026, 122.6382,  15.3401,  34.5984,  22.6733,
          16.1505,   7.8702,  15.1792,  42.9855,   5.7067, 185.2332],
        [188.5569,  12.3578,  44.5499,   7.4627, 406.3045,   9.0966,  76.6618,
          99.0183,  89.4181,  12.0120,  71.5745,  10.2352,  20.3377,   9.4022,
           8.2375,   5.6555,  11.5642,  25.0575,   1.6539, 108.4080]])
tensor([[1783.,  319.,  372.,   81., 1366.,   96.,  350.,  466.,  282.,   77.,
          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  530.],
        [1783.,  319.,  372.,   81., 1366.,   96.,  350.,  466.,  282.,   77.,
          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  530.]])
Final normalized entropy matrix over all instances
tensor([[0.1935, 0.0921, 0.2800, 0.1934, 0.3313, 0.2614, 0.3670, 0.3512, 0.3993,
         0.2065, 0.5524, 0.2950, 0.2724, 0.3384, 0.2833, 0.2811, 0.2617, 0.4386,
         0.1841, 0.3495],
        [0.1058, 0.0387, 0.1198, 0.0921, 0.2974, 0.0948, 0.2190, 0.2125, 0.3171,
         0.1560, 0.3224, 0.1968, 0.1601, 0.1403, 0.1445, 0.2020, 0.1994, 0.2557,
         0.0534, 0.2045]])
         
"""

Adding instance labels to mm_dataAdding instance labels to mm_data
filename: Adding instance labels to mm_data
Adding instance labels to mm_data 
filename: Adding instance labels to mm_data/scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename: Adding instance labels to mm_dataAdding instance labels to mm_dataAdding instance labels to mm_dataAdding instance labels to mm_data
filename:  Adding instance labels to mm_data

filename: filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
/scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv

filename: filename: filename:   /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0050_01/scene0050_01_vh_clean_2.ply/scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0

filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0193_00/scene0193_00_vh_clean_2.ply
tensor([26])
63
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0193_01/scene0193_01_vh_clean_2.ply
tensor([27])
36
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0196_00/scene0196_00_vh_clean_2.ply
tensor([28])
180
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0203_00/scene0203_00_vh_clean_2.ply
tensor([29])
Adding instance labels to mm_data
filenam

filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0356_01/scene0356_01_vh_clean_2.ply
tensor([85])
99
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0356_02/scene0356_02_vh_clean_2.ply
tensor([86])
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0357_00/scene0357_00_vh_clean_2.ply
tensor([87])
72
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0357_01/scene0357_01_vh_clean_2.ply
tensor([88])
81
81
Adding instance labels to mm_data
filen

filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0552_00/scene0552_00_vh_clean_2.ply
tensor([144])
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0552_01/scene0552_01_vh_clean_2.ply
tensor([145])
99
54
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0553_00/scene0553_00_vh_clean_2.ply
tensor([146])
90
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallre

Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0629_02/scene0629_02_vh_clean_2.ply
tensor([203])
225
126
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0633_00/scene0633_00_vh_clean_2.ply
tensor([204])
99
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0633_01/scene0633_01_vh_clean_2.ply
tensor([205])
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shar

filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0696_02/scene0696_02_vh_clean_2.ply
tensor([261])
279
180
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0697_00/scene0697_00_vh_clean_2.ply
tensor([262])
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0697_01/scene0697_01_vh_clean_2.ply
tensor([263])
162
Adding instance labels to mm_data
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/metadata/scannetv2-labels.combined.tsv
filename:  /scratch-shared/fsun/dvata/scannet-neucon-smallres-m2f/raw/scans/scene0697_02/scene0697_02_vh_clean_2.ply
tensor([264])
153
Adding instance labels to mm_da

" ViT Masks refined\n3D seen points metrics\n{'train_acc': 92.66021202577072, 'train_macc': 83.70569221362946, 'train_miou': 75.68768085547666}\ntensor([[312.2038,  26.8408,  94.0868,  14.3544, 503.6727,  21.2325, 122.7478,\n         150.3396,  97.9678,  15.0548, 109.4467,  19.5293,  34.5468,  19.8944,\n          14.7285,   8.3424,  18.4457,  42.0031,   4.7867, 179.6838],\n        [229.2438,   9.2486,  56.4155,   7.0754, 120.2393,   7.9409,  44.4460,\n         102.9044,  87.2189,   6.8330,  76.6685,  13.7709,  12.5995,  12.4757,\n           5.0266,   3.5362,   5.3676,  36.9687,   2.2857,  89.8911]])\ntensor([[1781.,  319.,  372.,   81., 1367.,   96.,  350.,  465.,  282.,   77.,\n          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  528.],\n        [1781.,  319.,  372.,   81., 1367.,   96.,  350.,  465.,  282.,   77.,\n          222.,   52.,  127.,   67.,   57.,   28.,   58.,   98.,   31.,  528.]])\nFinal normalized entropy matrix over all instances\ntensor([[0.1753, 

In [12]:
print(cum_n_entropy / (instance_count + 1e-8) * 100)


tensor([[19.3465,  9.2115, 27.9962, 19.3411, 33.1303, 26.1448, 36.6960, 35.1177,
         39.9257, 20.6528, 55.2424, 29.5002, 27.2429, 33.8408, 28.3342, 28.1079,
         26.1711, 43.8628, 18.4088, 34.9497],
        [10.5753,  3.8739, 11.9758,  9.2132, 29.7441,  9.4756, 21.9034, 21.2486,
         31.7085, 15.6000, 32.2408, 19.6831, 16.0139, 14.0331, 14.4517, 20.1982,
         19.9383, 25.5689,  5.3351, 20.4543]])
