Skip to content

Commit

Permalink
for DTU
Browse files Browse the repository at this point in the history
  • Loading branch information
xiuming committed Sep 5, 2021
1 parent c1ba8a4 commit 0831990
Show file tree
Hide file tree
Showing 6 changed files with 391 additions and 23 deletions.
96 changes: 96 additions & 0 deletions nerfactor/config/nerfactor_mvs.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
[DEFAULT]

# ====== Must-Have ======
# These parameters are required by the pipeline, regardless of your custom code

# ------ Data ------
dataset = mvs_shape
no_batch = True
# bs = 4
cache = True

# ------ Model ------
model = nerfactor

# ------ Optimization ------
loss = l2
lr = 5e-3
lr_decay_steps = 500_000
lr_decay_rate = 0.1
clipnorm = -1
clipvalue = -1
epochs = 100

# ------ Logging and Checkpointing ------
ckpt_period = 10
vali_period = 10
vali_batches = 4
vis_train_batches = 4
keep_recent_epochs = -1

# ------ IO ------
overwrite = False
# The following two decide the output directory
outroot = /output/train/hotdog_2163_nerfactor_mvs/
xname = lr{lr}


# ====== Custom ======
# These parameters are whatever your custom dataset and model require

# ------ Data ------
mvs_root = /output/surf_mvs/hotdog_2163/
use_nerf_alpha = False
imh = 512
light_h = 16
near = 2
far = 6
ndc = False
white_bg = True

# ------ Model ------
xyz_jitter_std = 0.01
smooth_use_l1 = True
# DTU scenes have huge XYZs
xyz_scale = 1e-3
# Shape
shape_mode = finetune
shape_model_ckpt = /output/train/hotdog_2163_shape_mvs/lr1e-2/checkpoints/ckpt-2
nerf_shape_respect = 0.1
normal_loss_weight = 0.1
lvis_loss_weight = 0.1
normal_smooth_weight = 0.05
lvis_smooth_weight = 0.05
# BRDF
albedo_slope = 0.77
albedo_bias = 0.03
pred_brdf = True
default_z = 0.1
brdf_model_ckpt = /output/train/merl/lr1e-2/checkpoints/ckpt-50
albedo_smooth_weight = 0.05
brdf_smooth_weight = 0.01
learned_brdf_scale = 1
# Lighting
light_init_max = 1
light_tv_weight = 5e-6
light_achro_weight = 0
# Rendering
linear2srgb = True
test_envmap_dir = /data/envmaps/for-render_h16/test/

# ------ Network ------
mlp_chunk = 65536
mlp_width = 128
mlp_depth = 4
mlp_skip_at = 2
# Positional encoding
pos_enc = True
n_freqs_xyz = 10
n_freqs_ldir = 4
n_freqs_vdir = 4

# ------ Misc. ------
# De facto training batch size: number of random rays per gradient step
n_rays_per_step = 1024
# File viewer prefix, if any
viewer_prefix = http://vision38.csail.mit.edu
71 changes: 71 additions & 0 deletions nerfactor/config/shape_mvs.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
[DEFAULT]

# ====== Must-Have ======
# These parameters are required by the pipeline, regardless of your custom code

# ------ Data ------
dataset = mvs_shape
no_batch = True
# bs = 4
cache = True

# ------ Model ------
model = shape

# ------ Optimization ------
loss = l2
lr = 1e-2
lr_decay_steps = 500_000
lr_decay_rate = 0.1
clipnorm = -1
clipvalue = -1
epochs = 200

# ------ Logging and Checkpointing ------
ckpt_period = 100
vali_period = 100
vali_batches = 4
vis_train_batches = 4
keep_recent_epochs = -1

# ------ IO ------
overwrite = False
# The following two decide the output directory
outroot = /output/train/hotdog_2163_shape_mvs
xname = lr{lr}


# ====== Custom ======
# These parameters are whatever your custom dataset and model require

# ------ Data ------
mvs_root = /output/surf_mvs/hotdog_2163
imh = 512
light_h = 16
near = 2
far = 6
ndc = False
white_bg = True

# ------ Model ------
xyz_jitter_std = 0.01
smooth_use_l1 = True
# DTU scenes have huge XYZs
xyz_scale = 1e-3
# De facto batch size: number of random rays per gradient step
n_rays_per_step = 1024
normal_loss_weight = 1
lvis_loss_weight = 1
# Positional encoding
pos_enc = True
n_freqs_xyz = 10
n_freqs_ldir = 4
n_freqs_vdir = 4

# ------ Network ------
mlp_chunk = 65536
mlp_width = 128
mlp_depth = 4
mlp_skip_at = 2

viewer_prefix = http://vision38.csail.mit.edu
121 changes: 121 additions & 0 deletions nerfactor/datasets/mvs_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint: disable=invalid-unary-operand-type

from os.path import join
import numpy as np

from third_party.xiuminglib import xiuminglib as xm
from nerfactor.util import logging as logutil, io as ioutil, tensor as tutil
from nerfactor.datasets.nerf_shape import Dataset as BaseDataset


logger = logutil.Logger(loggee="datasets/mvs_shape")


class Dataset(BaseDataset):
def _glob(self):
mvs_root = self.config.get('DEFAULT', 'mvs_root')
# Glob metadata paths
mode_str = 'val' if self.mode == 'vali' else self.mode
if self.debug:
logger.warn("Globbing a single data file for faster debugging")
metadata_dir = join(mvs_root, '%s_000' % mode_str)
else:
metadata_dir = join(mvs_root, '%s_???' % mode_str)
# Include only cameras with all required buffers (depending on mode)
metadata_paths, incomplete_paths = [], []
for metadata_path in xm.os.sortglob(metadata_dir, 'metadata.json'):
id_ = self._parse_id(metadata_path)
view_dir = join(mvs_root, id_)
lvis_path = join(view_dir, 'lvis.npy')
normal_path = join(view_dir, 'normal.npy')
xyz_path = join(view_dir, 'xyz.npy')
alpha_path = join(view_dir, 'alpha.png')
paths = {
'xyz': xyz_path, 'normal': normal_path, 'lvis': lvis_path,
'alpha': alpha_path}
if self.mode != 'test':
rgba_path = join(view_dir, 'rgba.png')
paths['rgba'] = rgba_path
if ioutil.all_exist(paths):
metadata_paths.append(metadata_path)
self.meta2buf[metadata_path] = paths
else:
incomplete_paths.append(metadata_path)
if incomplete_paths:
logger.warn((
"Skipping\n\t%s\nbecause at least one of their paired "
"buffers doesn't exist"), incomplete_paths)
logger.info("Number of '%s' views: %d", self.mode, len(metadata_paths))
return metadata_paths

# pylint: disable=arguments-differ
def _load_data(self, metadata_path):
imh = self.config.getint('DEFAULT', 'imh')
use_nerf_alpha = self.config.getboolean('DEFAULT', 'use_nerf_alpha')
metadata_path = tutil.eager_tensor_to_str(metadata_path)
id_ = self._parse_id(metadata_path)
# Rays
metadata = ioutil.read_json(metadata_path)
h, w = metadata['imh'], metadata['imw']
cam_loc = np.array(metadata['cam_loc'])
rayo = np.tile(cam_loc[None, None, :], (h, w, 1))
rayo = rayo.astype(np.float32)
rayd = np.zeros_like(rayo) # dummy
# Load precomputed shape properties
paths = self.meta2buf[metadata_path]
xyz = ioutil.load_np(paths['xyz'])
normal = ioutil.load_np(paths['normal'])
if self.debug:
logger.warn("Faking light visibility for faster debugging")
lvis = 0.5 * np.ones(normal.shape[:2] + (512,), dtype=np.float32)
else:
lvis = ioutil.load_np(paths['lvis'])
# RGB and alpha, depending on the mode
if self.mode == 'test':
# No RGBA, so estimated alpha and placeholder RGB
alpha = xm.io.img.load(paths['alpha'])
alpha = xm.img.normalize_uint(alpha)
rgb = np.zeros_like(xyz)
else:
# Training or validation, where each camera has a paired image
rgba = xm.io.img.load(paths['rgba'])
assert rgba.ndim == 3 and rgba.shape[2] == 4, \
"Input image is not RGBA"
rgba = xm.img.normalize_uint(rgba)
rgb = rgba[:, :, :3]
if use_nerf_alpha: # useful for real scenes
alpha = xm.io.img.load(paths['alpha'])
alpha = xm.img.normalize_uint(alpha)
else:
alpha = rgba[:, :, 3] # ground-truth alpha
# Resize
if imh != xyz.shape[0]:
xyz = xm.img.resize(xyz, new_h=imh)
normal = xm.img.resize(normal, new_h=imh)
lvis = xm.img.resize(lvis, new_h=imh)
alpha = xm.img.resize(alpha, new_h=imh)
rgb = xm.img.resize(rgb, new_h=imh)
# Make sure there's no XYZ coinciding with camera (caused by occupancy
# accumulating to 0)
assert not np.isclose(xyz, rayo).all(axis=2).any(), \
"Found XYZs coinciding with the camera"
# Re-normalize normals and clip light visibility before returning
normal = xm.linalg.normalize(normal, axis=2)
assert np.isclose(np.linalg.norm(normal, axis=2), 1).all(), \
"Found normals with a norm far away from 1"
lvis = np.clip(lvis, 0, 1)
return id_, rayo, rayd, rgb, alpha, xyz, normal, lvis
25 changes: 13 additions & 12 deletions nerfactor/models/nerfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import tensorflow as tf

from third_party.xiuminglib import xiuminglib as xm
from brdf.renderer import gen_light_xyz
from nerfactor.models.shape import Model as ShapeModel
from nerfactor.models.brdf import Model as BRDFModel
from nerfactor.networks import mlp
Expand Down Expand Up @@ -63,9 +62,8 @@ def __init__(self, config, debug=False):
self._light = None # see the light property
light_h = self.config.getint('DEFAULT', 'light_h')
self.light_res = (light_h, 2 * light_h)
lxyz, lareas = gen_light_xyz(*self.light_res)
self.lxyz = tf.convert_to_tensor(lxyz, dtype=tf.float32)
self.lareas = tf.convert_to_tensor(lareas, dtype=tf.float32)
lxyz, lareas = self._gen_lights()
self.lxyz, self.lareas = lxyz, lareas
# Novel lighting conditions for relighting at test time:
olat_inten = self.config.getfloat('DEFAULT', 'olat_inten', fallback=200)
ambi_inten = self.config.getfloat(
Expand Down Expand Up @@ -143,7 +141,7 @@ def _init_net(self):
[mlp_width] * mlp_depth, act=['relu'] * mlp_depth,
skip_at=[mlp_skip_at])
net['brdf_z_out'] = mlp.Network([self.z_dim], act=None)
# Training from scratch, finetuning, or just using NeRF geometry?
# Training from scratch, finetuning, or just using initial geometry?
if self.shape_mode == 'scratch':
net['normal_mlp'] = mlp.Network(
[mlp_width] * mlp_depth, act=['relu'] * mlp_depth,
Expand Down Expand Up @@ -385,13 +383,14 @@ def _pred_albedo_at(self, pts):
mlp_layers = self.net['albedo_mlp']
out_layer = self.net['albedo_out'] # output in [0, 1]
embedder = self.embedder['xyz']
pts_scaled = self.xyz_scale * pts # transparent to the user

def chunk_func(surf):
surf_embed = embedder(surf)
albedo = out_layer(mlp_layers(surf_embed))
return albedo

albedo = self.chunk_apply(chunk_func, pts, 3, self.mlp_chunk)
albedo = self.chunk_apply(chunk_func, pts_scaled, 3, self.mlp_chunk)
albedo = albedo_scale * albedo + albedo_bias # [bias, scale + bias]
albedo = tf.debugging.check_numerics(albedo, "Albedo")
return albedo # Nx3
Expand All @@ -400,13 +399,15 @@ def _pred_brdf_at(self, pts):
mlp_layers = self.net['brdf_z_mlp']
out_layer = self.net['brdf_z_out']
embedder = self.embedder['xyz']
pts_scaled = self.xyz_scale * pts # transparent to the user

def chunk_func(surf):
surf_embed = embedder(surf)
brdf_z = out_layer(mlp_layers(surf_embed))
return brdf_z

brdf_z = self.chunk_apply(chunk_func, pts, self.z_dim, self.mlp_chunk)
brdf_z = self.chunk_apply(
chunk_func, pts_scaled, self.z_dim, self.mlp_chunk)
return brdf_z # NxZ

def _eval_brdf_at(self, pts2l, pts2c, normal, albedo, brdf_prop):
Expand Down Expand Up @@ -502,7 +503,7 @@ def compute_loss(self, pred, gt, **kwargs):
return loss
# If we modify the geometry
if self.shape_mode in ('scratch', 'finetune'):
# Predicted values should be close to NeRF values
# Predicted values should be close to initial values
normal_loss = tf.keras.losses.MSE(normal_gt, normal_pred) # N
lvis_loss = tf.keras.losses.MSE(lvis_gt, lvis_pred) # N
loss += normal_loss_weight * normal_loss
Expand Down Expand Up @@ -725,13 +726,13 @@ def composite_on_avg_light(render, light_uint):
(im1, im2), outpath=join(outdir, 'pred-vs-gt_rgb.apng'))
if self.shape_mode != 'nerf':
im1 = xm.vis.text.put_text(
img_dict['gt_normal'], "NeRF", **put_text_kwargs)
img_dict['gt_normal'], "Initial", **put_text_kwargs)
im2 = xm.vis.text.put_text(
img_dict['pred_normal'], "Prediction", **put_text_kwargs)
xm.vis.anim.make_anim(
(im1, im2), outpath=join(outdir, 'pred-vs-gt_normal.apng'))
im1 = xm.vis.text.put_text(
img_dict['gt_lvis'], "NeRF", **put_text_kwargs)
img_dict['gt_lvis'], "Initial", **put_text_kwargs)
im2 = xm.vis.text.put_text(
img_dict['pred_lvis'], "Prediction", **put_text_kwargs)
xm.vis.anim.make_anim(
Expand Down Expand Up @@ -776,7 +777,7 @@ def _compile_into_webpage(self, batch_dirs, out_html):
rowtypes = ['text', 'image', 'image', 'image', 'image']
if self.shape_mode == 'nerf':
row.append(join(batch_dir, 'gt_normal.png'))
rowcaps.append("Normal (NeRF)")
rowcaps.append("Normal (initial)")
rowtypes.append('image')
else:
row.append(join(batch_dir, 'pred-vs-gt_normal.apng'))
Expand All @@ -787,7 +788,7 @@ def _compile_into_webpage(self, batch_dirs, out_html):
rowtypes.append('image')
if self.shape_mode == 'nerf':
row.append(join(batch_dir, 'gt_lvis.png'))
rowcaps.append("Light Visibility (NeRF)")
rowcaps.append("Light Visibility (initial)")
rowtypes.append('image')
else:
row.append(join(batch_dir, 'pred-vs-gt_lvis.apng'))
Expand Down

0 comments on commit 0831990

Please sign in to comment.