In [1]:
from google.colab import drive
import os

# set path to project folder
gdrive_path='/content/gdrive/MyDrive/1-university/masters/2-semester/in2390_adl4cv/nerf_segmentation/' # Luca's Path
# gdrive_path='/content/gdrive/MyDrive/...' # Luis' Path

# mount Google Drive
drive.mount('/content/gdrive', force_remount=True)

# navigate to Google Drive folder
os.chdir(gdrive_path)

# check that we are in the right folder
print(sorted(os.listdir()))

Mounted at /content/gdrive
['.git', 'README.md', 'data', 'data_loader.ipynb', 'example_arrows', 'gitignore', 'open_nerf.ipynb']


Execute the following code from your machine's terminal:
```
git clone https://huggingface.co/datasets/YWjimmy/PeRFception-ScanNet
```

Then compress the repository into a .zip-file and place it inside the `/data/` folder of this repository. This step is necessary to allow the execution of this notebook inside Google Colab. In our next step, we will unzip the file inside of this Colab session:


In [None]:
!unzip "/content/gdrive/MyDrive/1-university/masters/2-semester/in2390_adl4cv/nerf_segmentation/data/PeRFception-ScanNet.zip"

Archive:  /content/gdrive/MyDrive/1-university/masters/2-semester/in2390_adl4cv/nerf_segmentation/data/PeRFception-ScanNet.zip
replace PeRFception-ScanNet/README.md? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace PeRFception-ScanNet/.gitattributes? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace PeRFception-ScanNet/plenoxel_scannet_scene0067_00/thick.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace PeRFception-ScanNet/plenoxel_scannet_scene0067_00/last.ckpt? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

Additionally, you should place the train/val/test split files that were used in the original PeRFception paper. From the [NeRF-Downstream](https://github.com/POSTECH-CVLab/NeRF-Downstream) repository, you should install the following files from the `/co3d_3d/datasets/splits/` directory and place them inside `/data/split/`: `scannetv2_train`, `scannetv2_val`, `scannetv2_test`. Additionally, make sure to put the `scene_scales.data` file in that folder as well.

Btw. I haven't checked the exact difference between `co3d_3d` and `co3d_2d` repository. I first wanted to get the code running in some way.

In [None]:
!python -m pip install --upgrade pip setuptools wheel
!python -m pip install --upgrade pip

[0m

In [None]:
!python -m pip install torch torchvision torchaudio plyfile MinkowskiEngine

# note: it is normal that building wheels for MinkowskiEngine takes about 15min

Collecting plyfile
  Using cached plyfile-1.0.3-py3-none-any.whl.metadata (2.1 kB)
Collecting MinkowskiEngine
  Using cached MinkowskiEngine-0.5.4.tar.gz (246 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MinkowskiEngine is not compatible with Python3.10. You have to go to `/usr/local/lib/python3.10/dist-packages/MinkowskiEngine/` and adjust some packages to use updated imports for `Sequence` that was previously part of `collections` and is now part of `collections.abc` as mentioned here: https://github.com/NVIDIA/MinkowskiEngine/issues/526

In [None]:
CLASS_LABELS = (
    "wall",
    "floor",
    "cabinet",
    "bed",
    "chair",
    "sofa",
    "table",
    "door",
    "window",
    "bookshelf",
    "picture",
    "counter",
    "desk",
    "curtain",
    "refrigerator",
    "shower curtain",
    "toilet",
    "sink",
    "bathtub",
    "otherfurniture",
)

VALID_CLASS_IDS = (
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    14,
    16,
    24,
    28,
    33,
    34,
    36,
    39,
)

CLASS_LABELS_INSTANCE = (
    "cabinet",
    "bed",
    "chair",
    "sofa",
    "table",
    "door",
    "window",
    "bookshelf",
    "picture",
    "counter",
    "desk",
    "curtain",
    "refrigerator",
    "shower curtain",
    "toilet",
    "sink",
    "bathtub",
    "otherfurniture",
)

VALID_CLASS_IDS_INSTANCE = (
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    14,
    16,
    24,
    28,
    33,
    34,
    36,
    39,
)

SCANNET_COLOR_MAP = {
    0: (0.0, 0.0, 0.0),
    1: (174.0, 199.0, 232.0),
    2: (152.0, 223.0, 138.0),
    3: (31.0, 119.0, 180.0),
    4: (255.0, 187.0, 120.0),
    5: (188.0, 189.0, 34.0),
    6: (140.0, 86.0, 75.0),
    7: (255.0, 152.0, 150.0),
    8: (214.0, 39.0, 40.0),
    9: (197.0, 176.0, 213.0),
    10: (148.0, 103.0, 189.0),
    11: (196.0, 156.0, 148.0),
    12: (23.0, 190.0, 207.0),
    14: (247.0, 182.0, 210.0),
    15: (66.0, 188.0, 102.0),
    16: (219.0, 219.0, 141.0),
    17: (140.0, 57.0, 197.0),
    18: (202.0, 185.0, 52.0),
    19: (51.0, 176.0, 203.0),
    20: (200.0, 54.0, 131.0),
    21: (92.0, 193.0, 61.0),
    22: (78.0, 71.0, 183.0),
    23: (172.0, 114.0, 82.0),
    24: (255.0, 127.0, 14.0),
    25: (91.0, 163.0, 138.0),
    26: (153.0, 98.0, 156.0),
    27: (140.0, 153.0, 101.0),
    28: (158.0, 218.0, 229.0),
    29: (100.0, 125.0, 154.0),
    30: (178.0, 127.0, 135.0),
    32: (146.0, 111.0, 194.0),
    33: (44.0, 160.0, 44.0),
    34: (112.0, 128.0, 144.0),
    35: (96.0, 207.0, 209.0),
    36: (227.0, 119.0, 194.0),
    37: (213.0, 92.0, 176.0),
    38: (94.0, 106.0, 211.0),
    39: (82.0, 84.0, 163.0),
    40: (100.0, 85.0, 144.0),
}



In [None]:
from torch.utils.data import Dataset
import numpy as np
import logging
import MinkowskiEngine as ME
from plyfile import PlyData
import pickle
from typing import List, Optional, Union

def load_ply(filename, load_label=False, load_instance=False):
    plydata = PlyData.read(filename)
    data = plydata.elements[0].data
    coords = np.array([data["x"], data["y"], data["z"]], dtype=np.float32).T
    feats = np.array([data["red"], data["green"], data["blue"]], dtype=np.float32).T
    return_args = [coords, feats]
    if load_label:
        labels = np.array(data["label"], dtype=np.int32)
        return_args.append(labels)
    if load_instance:
        instances = np.array(data["instance"], dtype=np.int32)
    else:
        instances = np.ones(coords.shape[0], dtype=np.int32)
    return_args.append(instances)
    return tuple(return_args)

class PlenoxelScannetDataset(Dataset):

    NUM_LABELS = 41  # Will be converted to 20 as defined in IGNORE_LABELS.
    IGNORE_LABELS = tuple(set(range(NUM_LABELS)) - set(VALID_CLASS_IDS))
    IGNORE_LABELS_INSTANCE = tuple(
        set(range(NUM_LABELS)) - set(VALID_CLASS_IDS_INSTANCE)
    )
    CLASS_LABELS = CLASS_LABELS
    CLASS_LABELS_INSTANCE = CLASS_LABELS_INSTANCE
    VALID_CLASS_IDS = VALID_CLASS_IDS

    DATA_PATH_FILE = {
        "train": "scannetv2_train.txt",
        "val": "scannetv2_val.txt",
        "test": "scannetv2_test.txt",
    }
    def __init__(
        self,
        phase: str,
        data_root: str = "data/",
        train_transformations=[],
        eval_transformations=[],
        downsample_mode=1,
        downsample_stride=2,
        voxel_size: float = 0.02,
        num_points: int = -1,
        features: List[str] = ["sh"],
        ignore_label: int = -100,
        void_label: Optional[int] = None,
        valid_thres: float = 0.05,
        ignore_thres: Optional[float] = None,
    ) -> None:
        Dataset.__init__(self)
        phase = "test" if phase in ["val", "test"] else "train"
        transformations = (
            train_transformations if phase == "train" else eval_transformations
        )
        #self.transformations = (
        #    transforms.Compose([transforms.__dict__[t]() for t in transformations])
        #    if len(transformations) > 0
        #    else None
        #)
        self.phase = phase
        self.data_root = data_root
        self.num_points = num_points
        self.features = features
        self.voxel_size = voxel_size
        self.ignore_label = ignore_label
        self.void_label = void_label if void_label is not None else ignore_label
        self.valid_thres = valid_thres
        self.ignore_thres = ignore_thres
        self.downsample_mode = downsample_mode
        self.downsample_stride = downsample_stride

        with open(
            os.path.join(
                os.path.dirname(self.data_root), "split", self.DATA_PATH_FILE[phase]
            ),
            "r",
        ) as f:
            self.files = [l.strip("\n") for l in f.readlines() if not l.startswith("#")]

        if self.downsample_mode == 0:
            self.pool = ME.MinkowskiAvgPooling(
                kernel_size=self.downsample_stride,
                stride=self.downsample_stride,
                dimension=3,
            )

        # map labels not evaluated to ignore_label
        label_map, n_used = dict(), 0
        for l in range(self.NUM_LABELS):
            if l in self.IGNORE_LABELS:
                label_map[l] = ignore_label
            else:
                label_map[l] = n_used
                n_used += 1
        label_map[ignore_label] = ignore_label
        if void_label is not None and void_label != ignore_label:
            label_map[void_label] = n_used
        self.label_map = label_map

        with open(
            os.path.join(os.path.dirname(self.data_root), "split", "scene_scales.data"),
            "rb",
        ) as f:
            scene_scales = pickle.load(f)
        self.scene_scales = scene_scales
        logging.info(
            f"{self.__class__.__name__}(phase={phase}, total size={len(self.files)}, num_classes={len(self.CLASS_LABELS)}, downsample stride={self.downsample_stride})"
        )

    def __len__(self):
        return len(self.pc_files)

    def __getitem__(self, index) -> dict:
        inst_id = self.files[index]

        data = self.load_data(inst_id)
        links, density, sh, reso, labels, dists = (
            data["links"],
            data["density"],
            data["sh"],
            data["reso"],
            data["labels"],
            data["dists"],
        )
        coordinates = torch.stack(
            [
                links // (reso[1] * reso[2]),
                links % (reso[1] * reso[2]) // reso[2],
                links % reso[2],
            ],
            1,
        ).float()

        if len(self.features) > 1:
            density /= np.abs(density).max() + 1e-5

        coordinates, dist_density_sh_label = self.downsample(
            coordinates, torch.cat([dists, density, sh, labels], dim=1)
        )
        norm_coordinates = coordinates / reso * 2 - 1.0
        scene_scale = self.scene_scales[inst_id]
        scaled_coordinates = norm_coordinates / scene_scale
        xyzs = scaled_coordinates / self.voxel_size
        labels = dist_density_sh_label[:, -1]
        dist_density_sh = dist_density_sh_label[:, :-1]

        # normalize xyzs to fit in unit sphere
        # xyzs = coordinates - coordinates.mean(dim=1, keepdim=True)
        # max_norm = torch.linalg.norm(xyzs, dim=1).max()
        # xyzs = xyzs / max_norm
        raw_features = torch.cat([xyzs, dist_density_sh], dim=1).float()

        xyzs = xyzs.numpy().astype(np.float32)
        raw_features = raw_features.numpy().astype(np.float32)

        if self.transformations is not None:
            xyzs, raw_features, labels = self.transformations(
                xyzs, raw_features, labels
            )

        dists = raw_features[:, 3:4]
        density = raw_features[:, 4:5]
        sh = raw_features[:, 5:]
        ones = np.ones(density.shape)

        features = []
        for f in self.features:
            features.append(eval(f))
        features = np.concatenate(features, axis=1).astype(np.float32)
        if self.IGNORE_LABELS is not None:
            labels = np.array(
                [self.label_map[x] for x in labels.numpy()], dtype=np.int32
            )

        return {
            "coordinates": xyzs.astype(np.float32),
            "features": features.astype(np.float32),
            "xyzs": xyzs,
            "labels": labels,
            "dists": dists,
            "metadata": {"file": self.files[index]},
        }

    def downsample(self, coordinates, features):
        if self.downsample_mode == 0:
            bcoords = ME.utils.batched_coordinates([coordinates])
            stensor = ME.SparseTensor(features=features, coordinates=bcoords)
            output = self.pool(stensor)
            results = (output.C[:, 1:].float() / 2, output.F)
        elif self.downsample_mode == 1:
            sel = (coordinates % self.downsample_stride == 0).all(dim=1)
            # results = (coordinates[sel] / self.downsample_stride, features[sel])
            results = (coordinates[sel], features[sel])
        else:
            raise ValueError(f"Downsample mode {self.downsample_mode} is invalid.")

        logging.debug(
            f"voxel downsample with mode {self.downsample_mode} stride {self.downsample_stride}: from {coordinates.shape[0]} to {results[0].shape[0]}"
        )
        return results

    def load_data(self, inst_id):
        ckpt_path = os.path.join(
            self.data_root, "scannet", f"plenoxel_scannet_{inst_id}", "data.npz"
        )
        ckpt = np.load(ckpt_path)
        links = torch.from_numpy(ckpt["links"])
        density = torch.from_numpy(ckpt["density"])
        sh = ckpt["sh"].astype(np.float32) * ckpt["sh_scale"] + ckpt["sh_min"]
        sh = torch.from_numpy(sh)
        reso = ckpt["reso"]
        labels = torch.from_numpy(ckpt["labels"]).unsqueeze(1)

        dists = torch.from_numpy(ckpt["dists"]).unsqueeze(1)

        is_void = dists > self.valid_thres
        labels[is_void] = self.void_label

        if self.ignore_thres is not None and self.ignore_thres > 0:
            valid = dists < self.ignore_thres
            links = links[valid]
            sh = sh[valid]
            density = density[valid]
            labels = labels[valid]
        return dict(
            links=links, density=density, sh=sh, reso=reso, labels=labels, dists=dists
        )

In [None]:
dataset = PlenoxelScannetDataset(
        "train",
        "./data/",
        downsample_stride=2,
        void_label=-333,
        ignore_thres=0.20,
    )

### Incompatibility of DataLoader with PeRFception Dataset
If you execute the following cell, you will see that loading the dataset is not successful. I have the suspicion that this is because `PlenoxelScannetDataset` is different from `PeRFception-Scannet` (i.e. the dataset they provide). The reason for this is that in `PlenoxelScannetDataset`, they try to access `data.npz` which is not part of the downloaded dataset. I am very confused because in the [PeRFception-Downstream](https://github.com/POSTECH-CVLab/NeRF-Downstream) repository, you can search for terms like "PeRFception-Scannet" and will find only a single occurence in the repository's `README.md`.

What we will have to understand in order to keep working on this is the `/co3d_3d/src/modules/segmentation_training.py` file and how its components are spread throughout the repository.

In [None]:
print(dataset[0])

UnpicklingError: Failed to interpret file './data/scannet/plenoxel_scannet_scene0191_00/trans_info.npz' as a pickle

For now, let us ignore these issues and take a step back from their implementation. Let us first understand what information is accessible from the dataset.

In [None]:
import os

scene_path = "data/scannet/plenoxel_scannet_scene0001_01/"
files = os.listdir(scene_path)
print("Files in directory:", files)

Files in directory: ['trans_info.npz', 'init.npy', 'render_model', 'results.json', 'config.gin', 'thick.npy', 'last.ckpt']


Let's start by investigating `trans_info.npz`. As we can see, this file is an invalid `.zip`-file causing numpy to fail interpreting it.

In [None]:
import numpy as np
import zipfile

file_path = 'data/scannet/plenoxel_scannet_scene0001_01/trans_info.npz'

# Check if the file is a valid ZIP file
if zipfile.is_zipfile(file_path):
    print("This is a valid ZIP file.")
else:
    print("This is NOT a valid ZIP file.")

np.load(scene_path + "trans_info.npz", allow_pickle=True)

This is NOT a valid ZIP file.


UnpicklingError: Failed to interpret file 'data/scannet/plenoxel_scannet_scene0001_01/trans_info.npz' as a pickle

Next, we will investigate `init.npy`. We face the same issue here.

In [None]:
np.load(scene_path + "init.npy", allow_pickle=True)

UnpicklingError: Failed to interpret file 'data/scannet/plenoxel_scannet_scene0001_01/init.npy' as a pickle

`/render_model/` is a directory with 2D images in `.jpg`-format inside. We will print the first three images as an example.

In [None]:
files = os.listdir(scene_path + "render_model/")
print("Files in directory:", files[:3])

Files in directory: ['image016.jpg', 'image003.jpg', 'image000.jpg']


For the next file `results.json`, one may assume that a `.json`-decoder would work because the authors encode the file as a `.json`. However, this does not work because it simply references a Github storage. Feel free to download files and play around with them.

In [None]:
with open(scene_path + "results.json", 'r') as file:
    snippet = file.read(100)  # Read the first 100 characters
    print("First 100 characters of the file:\n", snippet)

First 100 characters of the file:
 version https://git-lfs.github.com/spec/v1
oid sha256:4ec3e655ee92106f969ea942ab89587d67376eeb7255b9
