In [None]:
# Description: This file is used to load the pretrained model and run the inference on the test set.
%load_ext autoreload
%autoreload 2

import os, glob, sys
import random
import numpy as np
import logging
import argparse
import urllib
import plyfile
import torch
from MinkowskiEngine import SparseTensor
# add parent path of the current path to sys.path to import util
sys.path.append('./Situation3D')
from lib import config
from models.mink_unet import DisNet
import lib.openscene.feature_loader
from lib.openscene.feature_loader import FusedFeatureLoader, collation_fn_eval_all
import importlib

cfg = config.load_cfg_from_cfg_file('./Situation3D/config/ours_openseg_pretrained.yaml')
args = cfg

In [2]:
model = DisNet('openseg')
val_data = FusedFeatureLoader(datapath_prefix=args.data_root,
                            datapath_prefix_feat=args.data_root_2d_fused_feature,
                            voxel_size=args.voxel_size, 
                            split=args.split, aug=False,
                            memcache_init=args.use_shm, eval_all=True, identifier=6797,
                            input_color=args.input_color)
val_sampler = None
val_loader = torch.utils.data.DataLoader(val_data, batch_size=2,
                                            shuffle=False, num_workers=args.test_workers, pin_memory=True,
                                            drop_last=False, collate_fn=collation_fn_eval_all,
                                            sampler=val_sampler)
# # coords:       [A~<81369, 4] 79889 if no augmentation in voxelization
# # feat:         [A~<81369, 3] all 1
# # label:        [81369]
# # feat_3d:      [A~<81369, 768]
# # mask:         [A~<81369]
# # inds_reverse: [81369]
coords, feat, label, feat_3d, mask, inds_reverse = next(iter(val_loader))
print('=> len of the dataset:        ', len(val_data))
print('=> len of the loader:         ', len(val_loader))
print('=> coords shape and examples: ', coords.shape)
print('=> feat shape and examples:   ', feat.shape)
print('=> label shape and examples:  ', label.shape)
print('=> feat_3d shape and examples:', feat_3d.shape)
print('=> mask shape and examples:   ', mask.shape)
print('=> inds_reverse shape and examples: ', inds_reverse.shape)
# print(coords[:3])
# # From voxel to 3D coords
# coords_3d = voxel / scale + voxel_size / 2

# sinput.C:     [A~<81369, 3] looks just like input coords
# sinput.F:     [A~<81369, 3] looks just like input feat
sinput = SparseTensor(feat.cuda(non_blocking=True), coords.cuda(non_blocking=True))
# print('=> sinput.C shape and examples: ', sinput.C.shape)
print(sinput.C[:3])
# print('=> sinput.F shape and examples: ', sinput.F.shape)
# print(sinput.F[:3])

# print(sinput.decomposed_coordinates[0].shape)
# print(sinput.decomposed_coordinates[1].shape)

print('===============> END of BLOCK/1 <===============')

data_dict = {}
data_dict['openscene_in'] = sinput
model = model.cuda()
checkpoint = torch.load('~/.cache/torch/hub/checkpoints/scannet_openseg.pth.tar')
model.load_state_dict(checkpoint['state_dict'], strict=True)
data_dict = model(data_dict)

feat_bottleneck = data_dict['feat_bottleneck']
feat_layer5 = data_dict['feat_layer5']
feat_layer6 = data_dict['feat_layer6']
out = data_dict['openscene_out']
print('=> feat_bottleneck C&F shape and examples: ', feat_bottleneck.C.shape, feat_bottleneck.C.min(0)[0].cpu(), feat_bottleneck.C.max(0)[0].cpu())
print('=> tensor_stride shape and examples: ', feat_bottleneck.tensor_stride)
print(feat_bottleneck.C[:3])
# print(feat_bottleneck.F[:3])

list_of_coords, list_of_featurs = feat_bottleneck.decomposed_coordinates_and_features
for batch_idx, (coords, feats) in enumerate(zip(list_of_coords, list_of_featurs)):
    print('=> coords shape and examples: ', coords.shape)
    reduced_coords = coords[:, [0, 1]]
    unique_coords, indices = reduced_coords.unique(dim=0, return_inverse=True)
    reduced_feats = torch.zeros(unique_coords.size(0), feats.size(1), device=feats.device)
    # sum / amax / mean
    reduced_feats = reduced_feats.scatter_reduce_(0, indices.unsqueeze(-1).expand_as(feats), feats, reduce='mean')
    print('=> unique_coords shape and examples: ', unique_coords.shape)
    print(unique_coords[:3])
    positions = (unique_coords + torch.tensor(feat_bottleneck.tensor_stride[0:2], device=feats.device)/2) * 0.02
    print('=> Actual 3D positions:')
    print(positions[:3])
    print('=> reduced_feats shape and examples: ', reduced_feats.shape)




# feat_bottleneck_dense, feat_bottleneck_min_coordinate, feat_bottleneck_tensor_stride = feat_bottleneck.dense()
# print('=> feat_bottleneck_dense shape and examples: ', feat_bottleneck_dense.shape) # [2, 256, 27, 28, 10]
# # print('=> min_coordinate shape and examples: ', feat_bottleneck_min_coordinate.shape)     # [0, 0, 0]
# # print('=> tensor_stride shape and examples: ', feat_bottleneck_tensor_stride.shape)       # [16, 16, 16]
# print(feat_bottleneck_dense[0, :, -1, -1, -1])
# print(feat_bottleneck_dense[1, :, 8, 8, 0])
    

print('=> feat_layer5 C&F shape and examples: ', feat_layer5.C.shape, feat_layer5.C.min(0)[0].cpu(), feat_layer5.C.max(0)[0].cpu())
print(feat_layer5.C[:3])
# print(feat_layer5.F[:3])
print('=> feat_layer6 C&F shape and examples: ', feat_layer6.C.shape, feat_layer6.C.min(0)[0].cpu(), feat_layer6.C.max(0)[0].cpu())
print(feat_layer6.C[:3])
# print(feat_layer6.F[:3])


print('===============> END of CELL <===============')

=> len of the dataset:         2
=> len of the loader:          1
=> coords shape and examples:  torch.Size([165789, 4])
=> feat shape and examples:    torch.Size([165789, 3])
=> label shape and examples:   torch.Size([206895])
=> feat_3d shape and examples: torch.Size([165789, 768])
=> mask shape and examples:    torch.Size([165789])
=> inds_reverse shape and examples:  torch.Size([206895])
tensor([[  0, 415, 128, 128],
        [  0, 415, 128,  93],
        [  0, 415, 128,  95]], device='cuda:0', dtype=torch.int32)
=> feat_bottleneck C&F shape and examples:  torch.Size([2357, 4]) tensor([0, 0, 0, 0], dtype=torch.int32) tensor([  1, 416, 432, 144], dtype=torch.int32)
=> tensor_stride shape and examples:  [16, 16, 16]
tensor([[  0, 400, 128,  32],
        [  0, 400, 128,  48],
        [  0, 400, 128,  80]], device='cuda:0', dtype=torch.int32)
=> coords shape and examples:  torch.Size([1730, 3])
=> unique_coords shape and examples:  torch.Size([514, 2])
tensor([[  0, 224],
        [  0, 

  reduced_feats = reduced_feats.scatter_reduce_(0, indices.unsqueeze(-1).expand_as(feats), feats, reduce='mean')


tensor([[0.1600, 4.6400],
        [0.1600, 4.9600],
        [0.1600, 5.2800]], device='cuda:0')
=> reduced_feats shape and examples:  torch.Size([514, 256])
=> coords shape and examples:  torch.Size([627, 3])
=> unique_coords shape and examples:  torch.Size([298, 2])
tensor([[  0, 176],
        [  0, 192],
        [  0, 224]], device='cuda:0', dtype=torch.int32)
=> Actual 3D positions:
tensor([[0.1600, 3.6800],
        [0.1600, 4.0000],
        [0.1600, 4.6400]], device='cuda:0')
=> reduced_feats shape and examples:  torch.Size([298, 256])
=> feat_layer5 C&F shape and examples:  torch.Size([9646, 4]) tensor([0, 0, 0, 0], dtype=torch.int32) tensor([  1, 416, 432, 152], dtype=torch.int32)
tensor([[  0, 408, 128,  40],
        [  0, 408, 128,  48],
        [  0, 408, 128,  80]], device='cuda:0', dtype=torch.int32)
=> feat_layer6 C&F shape and examples:  torch.Size([36247, 4]) tensor([0, 0, 0, 0], dtype=torch.int32) tensor([  1, 420, 436, 152], dtype=torch.int32)
tensor([[  0, 412, 128,  9

In [10]:
# locs_in:      [81369, 3]      processed_data['feat']:         [19081, 768]
# feats_in:     [81369, 3]      processed_data['mask_full']:    [81369]
# labels_in:    [81369]
locs_in, feats_in, labels_in = torch.load('./dataset/ScanNet/openscene/scannet_3d/train/scene0000_00_vh_clean_2.pth')
processed_data = torch.load('./dataset/ScanNet/openscene/scannet_multiview_openseg/scene0000_00_0.pt')
print('=> Examples of openscene processed data: locs_in ', locs_in.shape)
print(locs_in[:3])
locs_in_homo = np.concatenate((np.ones((locs_in.shape[0], 1), dtype=np.int64), locs_in), axis=1)
print('=> Examples of openscene processed data: locs_in_homo ', locs_in_homo.shape)
print(locs_in_homo[:3])

# print('=> Examples of openscene processed data: labels_in ')
# print(labels_in[:3])

# # sqa_points: [50000, 9]
# sqa_points = np.load('./dataset/sqa3d/SQA3D/ScanQA/data/scannet/scannet_data/scene0000_00_vert.npy')
# sqa_points_aligned = np.load('./dataset/sqa3d/SQA3D/ScanQA/data/scannet/scannet_data/scene0000_00_aligned_vert.npy')

# raw_points:   [81369, 3]
raw_points_file = './dataset/sqa3d/SQA3D/ScanQA/data/scannet/scans/scene0000_00/scene0000_00_vh_clean_2.ply'
a = plyfile.PlyData().read(raw_points_file)
num_verts = a['vertex'].count
raw_points = np.zeros(shape=[num_verts, 3], dtype=np.float32)
raw_points[:,0] = a['vertex'].data['x']
raw_points[:,1] = a['vertex'].data['y']
raw_points[:,2] = a['vertex'].data['z']
print('=> Examples of raw points: ', raw_points.shape)
print(raw_points[:3])

=> Examples of openscene processed data: locs_in  (81369, 3)
[[0.5324214  4.5172734  0.26304942]
 [0.53404164 4.552089   0.262302  ]
 [0.544779   4.4811263  0.17396316]]
=> Examples of openscene processed data: locs_in_homo  (81369, 4)
[[1.         0.53242141 4.51727343 0.26304942]
 [1.         0.53404164 4.55208921 0.26230201]
 [1.         0.544779   4.48112631 0.17396316]]
=> Examples of raw points:  (81369, 3)
[[0.5324214  4.5172734  0.26304942]
 [0.53404164 4.552089   0.262302  ]
 [0.544779   4.4811263  0.17396316]]


In [8]:
coords = torch.tensor([ [5, 5, 0], 
                        [5, 5, 1], 
                        [4, 4, 2], 
                        [4, 4, 3],
                        [3, 3, 3]], dtype=torch.int64)
feats = coords + 0.1
print('=> Examples of feats: ')
print(feats)

# Remove the specified dimension from the coordinates
reduced_coords = coords[:, [0, 1]]

# Group by the reduced coordinates and sum the features
unique_coords, indices = reduced_coords.unique(dim=0, return_inverse=True)

reduced_feats = torch.zeros(unique_coords.size(0), feats.size(1), device=feats.device)
reduced_feats = reduced_feats.scatter_add_(0, indices.unsqueeze(-1).expand_as(feats), feats)

print('=> Examples of unique_coords: ')
print(unique_coords)
print('=> Examples of reduced_feats: ')
print(reduced_feats)


=> Examples of feats: 
tensor([[5.1000, 5.1000, 0.1000],
        [5.1000, 5.1000, 1.1000],
        [4.1000, 4.1000, 2.1000],
        [4.1000, 4.1000, 3.1000],
        [3.1000, 3.1000, 3.1000]])
=> Examples of unique_coords: 
tensor([[3, 3],
        [4, 4],
        [5, 5]])
=> Examples of reduced_feats: 
tensor([[ 3.1000,  3.1000,  3.1000],
        [ 8.2000,  8.2000,  5.2000],
        [10.2000, 10.2000,  1.2000]])
