# Plan

- merge two types of file
    - Obtain vertices, faces, colors from first file, but labels from second file.
    - And save in single joined file with extantion `.ply` in `scannet_merged_ply` directory

- debug transformations

# Imports

In [1]:
import os, sys
import time
import math
import numpy as np
import pandas as pd
import datetime
import glob
from IPython.display import display, HTML, FileLink
from pathlib import Path
from os.path import join, exists, basename, splitext
from matplotlib import pyplot as plt
from matplotlib import cm
import shutil
from tqdm import tqdm
from joblib import Parallel, delayed, cpu_count

# autoreload python modules on the fly when its source is changed
%load_ext autoreload
%autoreload 2

In [2]:
from fastai_sparse import utils, visualize
from fastai_sparse.utils import log
from fastai_sparse.visualize.utils import export_ply
from fastai_sparse.data_items import MeshItem

#visualize.options.interactive = False

In [3]:
n_jobs = cpu_count()
n_jobs

16

# Source

In [4]:
SOURCE_DIR = Path('scannet')
assert SOURCE_DIR.exists(), "Download scannet (see download_scannet.sh) and set path"

In [5]:
definition_of_spliting_dir = Path('ScanNet_Tasks_Benchmark')
assert definition_of_spliting_dir.exists()

In [6]:
def find_files(path, ext='_vh_clean_2.ply'):
    pattern = str(path / '*' / ('*' + ext))
    fnames = glob.glob(pattern)
    return fnames

print("Number of files found:", len(find_files(SOURCE_DIR)))

Number of files found: 1513


# Target

In [7]:
TARGET_DIR = Path('scannet_merged_ply')

if not TARGET_DIR.exists():
    os.mkdir(str(TARGET_DIR))

## train / valid / test lists

In [8]:
# train /valid / test splits
fn_lists = {}

fn_lists['train'] = definition_of_spliting_dir / 'scannetv1_train.txt'
fn_lists['valid'] = definition_of_spliting_dir / 'scannetv1_val.txt'
fn_lists['test'] = definition_of_spliting_dir / 'scannetv1_test.txt'

for datatype in ['train', 'valid', 'test']:
    assert fn_lists[datatype].exists(), datatype

Load lists

In [9]:
dfs = {}
for datatype in ['train', 'valid', 'test']:
    df = pd.read_csv(fn_lists[datatype], header=None, names=['scene_id'])
    df = df.assign(datatype=datatype)
    dfs[datatype] = df
    
    print(f"{datatype} counts: {len(df)}")

dfs['valid'].head()

train counts: 1045
valid counts: 156
test counts: 312


Unnamed: 0,scene_id,datatype
0,scene0534_00,valid
1,scene0534_01,valid
2,scene0319_00,valid
3,scene0273_00,valid
4,scene0273_01,valid


In [10]:
df = pd.concat([dfs['train'], dfs['valid'], dfs['test']])
df.head()

Unnamed: 0,scene_id,datatype
0,scene0191_00,train
1,scene0191_01,train
2,scene0191_02,train
3,scene0119_00,train
4,scene0230_00,train


## Check existence

In [11]:
files_exts = ['_vh_clean_2.ply', '_vh_clean_2.labels.ply']

t = tqdm(df.iterrows(), total=len(df), desc='Check files exist')
try:
    for i, row in t:
        for ext in files_exts:
            fn = SOURCE_DIR / f"{row.scene_id}" / f"{row.scene_id}{ext}"
            assert fn.exists(), fn
finally:
    t.clear()
    t.close()

Check files exist: 100%|██████████| 1513/1513 [00:00<00:00, 4756.37it/s]


# Convert one

In [12]:
def merge_one_row(row):
    """
    Obtain vertices, faces, colors from first file, but labels from second file.

    And save in single joined file with extantion `.merged.ply`
    
    """
    
    fn = SOURCE_DIR / f"{row.scene_id}" / f"{row.scene_id}{files_exts[0]}"
    fn2 = SOURCE_DIR / f"{row.scene_id}" / f"{row.scene_id}{files_exts[1]}"
    
    dir_out = TARGET_DIR / f"{row.scene_id}"
    fn_out = TARGET_DIR / f"{row.scene_id}" / f"{row.scene_id}.merged.ply"
    
    if not dir_out.exists():
        os.mkdir(dir_out)
    
    o = MeshItem.from_file(fn, colors_from_vertices=True)
        
    o2 = MeshItem.from_file(fn2, label_field='label', labels_from_vertices=True)
    labels = o2.labels
    
    
    # trimesh.exchange.ply.export_ply(o.data)
    res = export_ply(o.data, vertex_labels=labels.astype(np.uint16), label_type='ushort')
    with open(fn_out, "wb") as f:
        f.write(res)

In [13]:
row = df.iloc[0]

In [14]:
merge_one_row(row)

Test loading of the merged file

In [15]:
fn_out = TARGET_DIR / f"{row.scene_id}" / f"{row.scene_id}.merged.ply"

In [16]:
o = MeshItem.from_file(fn_out)
o.describe()
o.show()

MeshItem (scene0191_00.merged.ply)
vertices:     shape: (145736, 3)     dtype: float64        min:   -0.02024,  max:    7.69273,  mean:    2.06647
faces:        shape: (279045, 3)     dtype: int64          min:          0,  max:     145735,  mean: 72771.44137
colors:       shape: (145736, 4)     dtype: uint8          min:    6.00000,  max:  255.00000,  mean:  139.41301
labels:       shape: (145736,)       dtype: uint16         min:    0.00000,  max:   69.00000,  mean:    3.36522
Colors from vertices
Labels from vertices


VBox(children=(VBox(children=(Figure(camera=PerspectiveCamera(fov=46.0, position=(0.0, 0.0, 2.0), quaternion=(…

# Convert all

In [22]:
t = tqdm(df.iterrows(), total=len(df), desc="Convert files")

try:
    res = Parallel(n_jobs=n_jobs)(delayed(merge_one_row)(row) for i, row in t)
    #for i, row in t:
    #    merge_one_row(row)
finally:
    t.clear()
    t.close()
    sys.stderr.flush()

Convert files: 100%|██████████| 1513/1513 [07:10<00:00,  3.28it/s]
