In [1]:
import torch
from collections import defaultdict
import numpy as np
import mcubes
import trimesh

from models.rendering import *
from models.nerf import *

from datasets import dataset_dict

from utils import load_ckpt

# Load model and data

In [2]:
# Change here #
img_wh = (4032, 3024) # full resolution of the input images
dataset_name = 'llff' # blender or llff (own data)
scene_name = 'laptop_color_8' # whatever you want
root_dir = './datasets/nerf_llff_data/laptop' # the folder containing data
ckpt_path = './ckpts/laptop01_20221115/epoch=6.ckpt' # the model path
###############

kwargs = {'root_dir': root_dir,
          'img_wh': img_wh}
if dataset_name == 'llff':
    kwargs['spheric_poses'] = True
    kwargs['split'] = 'test'
else:
    kwargs['split'] = 'train'
    
chunk = 1024*32
dataset = dataset_dict[dataset_name](**kwargs)

embedding_xyz = Embedding(3, 10)
embedding_dir = Embedding(3, 4)

nerf_fine = NeRF()
load_ckpt(nerf_fine, ckpt_path, model_name='nerf_fine')
nerf_fine.cuda().eval();

# Search for tight bounds of the object (trial and error!)

In [27]:
### Tune these parameters until the whole object lies tightly in range with little noise ###
N = 128 # controls the resolution, set this number small here because we're only finding
        # good ranges here, not yet for mesh reconstruction; we can set this number high
        # when it comes to final reconstruction.
xmin, xmax = -2.4, 2.4 # left/right range
ymin, ymax = -2.4, 2.4 # forward/backward range
zmin, zmax = -3, -1 # up/down range
## Attention! the ranges MUST have the same length!
sigma_threshold = 30. # controls the noise (lower=maybe more noise; higher=some mesh might be missing)
############################################################################################

x = np.linspace(xmin, xmax, N)
y = np.linspace(ymin, ymax, N)
z = np.linspace(zmin, zmax, N)

xyz_ = torch.FloatTensor(np.stack(np.meshgrid(x, y, z), -1).reshape(-1, 3)).cuda()
dir_ = torch.zeros_like(xyz_).cuda()

with torch.no_grad():
    B = xyz_.shape[0]
    out_chunks = []
    for i in range(0, B, chunk):
        xyz_embedded = embedding_xyz(xyz_[i:i+chunk]) # (N, embed_xyz_channels)
        dir_embedded = embedding_dir(dir_[i:i+chunk]) # (N, embed_dir_channels)
        xyzdir_embedded = torch.cat([xyz_embedded, dir_embedded], 1)
        out_chunks += [nerf_fine(xyzdir_embedded)]
    rgbsigma = torch.cat(out_chunks, 0)
    
sigma = rgbsigma[:, -1].cpu().numpy()
sigma = np.maximum(sigma, 0)
sigma = sigma.reshape(N, N, N)

# The below lines are for visualization, COMMENT OUT once you find the best range and increase N!
vertices, triangles = mcubes.marching_cubes(sigma, sigma_threshold)
mesh = trimesh.Trimesh(vertices/N, triangles)
mesh.show()

In [None]:
# # You can already export "colorless" mesh if you don't need color
# mcubes.export_mesh(vertices, triangles, f"{scene_name}.dae")

# Generate .vol file for volume rendering in Unity

In [None]:
assert N==512, \
    'Please set N to 512 in the two above cell! Remember to comment out the visualization code (last 3 lines)!'

a = 1-np.exp(-(xmax-xmin)/N*sigma)
a = a.flatten()
rgb = (rgbsigma[:, :3].numpy()*255).astype(np.uint32)
i = np.where(a>0)[0] # valid indices (alpha>0)

rgb = rgb[i]
a = a[i]
s = rgb.dot(np.array([1<<24, 1<<16, 1<<8])) + (a*255).astype(np.uint32)
res = np.stack([i, s], -1).astype(np.uint32).flatten()
with open(f'{scene_name}.vol', 'wb') as f:
    f.write(res.tobytes())

# Extract colored mesh

Once you find the best range, now **RESTART** the notebook, and copy the configs to the following cell
and execute it.

In [2]:
# Copy the variables you have above here! ####
img_wh = (4032, 3024) # full resolution of the input images
dataset_name = 'llff' # blender or llff (own data)
scene_name = 'laptop_color_8' # whatever you want
root_dir = './datasets/nerf_llff_data/laptop' # the folder containing data
ckpt_path = './ckpts/laptop01_20221115/epoch=6.ckpt' # the model path

N = 128 # controls the resolution, set this number small here because we're only finding
        # good ranges here, not yet for mesh reconstruction; we can set this number high
        # when it comes to final reconstruction.
xmin, xmax = -2.4, 2.4 # left/right range
ymin, ymax = -2.4, 2.4 # forward/backward range
zmin, zmax = -2.4, 2.4 # up/down range
sigma_threshold = 30. # controls the noise (lower=maybe more noise; higher=some mesh might be missing)
###############################################

import os
os.environ['ROOT_DIR'] = root_dir
os.environ['DATASET_NAME'] = dataset_name
os.environ['SCENE_NAME'] = scene_name
os.environ['IMG_SIZE'] = f"{img_wh[0]} {img_wh[1]}"
os.environ['CKPT_PATH'] = ckpt_path
os.environ['N_GRID'] = "512" # final resolution. You can set this number high to preserve more details
os.environ['X_RANGE'] = f"{xmin} {xmax}"
os.environ['Y_RANGE'] = f"{ymin} {ymax}"
os.environ['Z_RANGE'] = f"{zmin} {zmax}"
os.environ['SIGMA_THRESHOLD'] = str(sigma_threshold)
os.environ['OCC_THRESHOLD'] = "0.2" # probably doesn't require tuning. If you find the color is not close
                                    # to real, try to set this number smaller (the effect of this number
                                    # is explained in my youtube video)

!python extract_color_mesh.py \
    --root_dir $ROOT_DIR \
    --dataset_name $DATASET_NAME \
    --scene_name $SCENE_NAME \
    --img_wh $IMG_SIZE \
    --ckpt_path $CKPT_PATH \
    --N_grid $N_GRID \
    --x_range $X_RANGE \
    --y_range $Y_RANGE \
    --z_range $Z_RANGE \
    --sigma_threshold $SIGMA_THRESHOLD \
    --occ_threshold $OCC_THRESHOLD

Predicting occupancy ...
100%|██████████████████████████████████████| 4096/4096 [00:10<00:00, 386.52it/s]
Extracting mesh ...
Removing noise ...
Mesh has 1.17 M vertices and 2.37 M faces.
Fusing colors ...
  "torch.norm is deprecated and may be removed in a future PyTorch release. "
100%|███████████████████████████████████████████| 31/31 [03:35<00:00,  6.96s/it]
Done!
