for DTU

google · Sep 5, 2021 · 0831990 · 0831990
1 parent c1ba8a4
commit 0831990
Show file tree

Hide file tree

Showing 6 changed files with 391 additions and 23 deletions.
diff --git a/nerfactor/config/nerfactor_mvs.ini b/nerfactor/config/nerfactor_mvs.ini
@@ -0,0 +1,96 @@
+[DEFAULT]
+
+# ====== Must-Have ======
+# These parameters are required by the pipeline, regardless of your custom code
+
+# ------ Data ------
+dataset = mvs_shape
+no_batch = True
+# bs = 4
+cache = True
+
+# ------ Model ------
+model = nerfactor
+
+# ------ Optimization ------
+loss = l2
+lr = 5e-3
+lr_decay_steps = 500_000
+lr_decay_rate = 0.1
+clipnorm = -1
+clipvalue = -1
+epochs = 100
+
+# ------ Logging and Checkpointing ------
+ckpt_period = 10
+vali_period = 10
+vali_batches = 4
+vis_train_batches = 4
+keep_recent_epochs = -1
+
+# ------ IO ------
+overwrite = False
+# The following two decide the output directory
+outroot = /output/train/hotdog_2163_nerfactor_mvs/
+xname = lr{lr}
+
+
+# ====== Custom ======
+# These parameters are whatever your custom dataset and model require
+
+# ------ Data ------
+mvs_root = /output/surf_mvs/hotdog_2163/
+use_nerf_alpha = False
+imh = 512
+light_h = 16
+near = 2
+far = 6
+ndc = False
+white_bg = True
+
+# ------ Model ------
+xyz_jitter_std = 0.01
+smooth_use_l1 = True
+# DTU scenes have huge XYZs
+xyz_scale = 1e-3
+# Shape
+shape_mode = finetune
+shape_model_ckpt = /output/train/hotdog_2163_shape_mvs/lr1e-2/checkpoints/ckpt-2
+nerf_shape_respect = 0.1
+normal_loss_weight = 0.1
+lvis_loss_weight = 0.1
+normal_smooth_weight = 0.05
+lvis_smooth_weight = 0.05
+# BRDF
+albedo_slope = 0.77
+albedo_bias = 0.03
+pred_brdf = True
+default_z = 0.1
+brdf_model_ckpt = /output/train/merl/lr1e-2/checkpoints/ckpt-50
+albedo_smooth_weight = 0.05
+brdf_smooth_weight = 0.01
+learned_brdf_scale = 1
+# Lighting
+light_init_max = 1
+light_tv_weight = 5e-6
+light_achro_weight = 0
+# Rendering
+linear2srgb = True
+test_envmap_dir = /data/envmaps/for-render_h16/test/
+
+# ------ Network ------
+mlp_chunk = 65536
+mlp_width = 128
+mlp_depth = 4
+mlp_skip_at = 2
+# Positional encoding
+pos_enc = True
+n_freqs_xyz = 10
+n_freqs_ldir = 4
+n_freqs_vdir = 4
+
+# ------ Misc. ------
+# De facto training batch size: number of random rays per gradient step
+n_rays_per_step = 1024
+# File viewer prefix, if any
+viewer_prefix = http://vision38.csail.mit.edu
diff --git a/nerfactor/config/shape_mvs.ini b/nerfactor/config/shape_mvs.ini
@@ -0,0 +1,71 @@
+[DEFAULT]
+
+# ====== Must-Have ======
+# These parameters are required by the pipeline, regardless of your custom code
+
+# ------ Data ------
+dataset = mvs_shape
+no_batch = True
+# bs = 4
+cache = True
+
+# ------ Model ------
+model = shape
+
+# ------ Optimization ------
+loss = l2
+lr = 1e-2
+lr_decay_steps = 500_000
+lr_decay_rate = 0.1
+clipnorm = -1
+clipvalue = -1
+epochs = 200
+
+# ------ Logging and Checkpointing ------
+ckpt_period = 100
+vali_period = 100
+vali_batches = 4
+vis_train_batches = 4
+keep_recent_epochs = -1
+
+# ------ IO ------
+overwrite = False
+# The following two decide the output directory
+outroot = /output/train/hotdog_2163_shape_mvs
+xname = lr{lr}
+
+
+# ====== Custom ======
+# These parameters are whatever your custom dataset and model require
+
+# ------ Data ------
+mvs_root = /output/surf_mvs/hotdog_2163
+imh = 512
+light_h = 16
+near = 2
+far = 6
+ndc = False
+white_bg = True
+
+# ------ Model ------
+xyz_jitter_std = 0.01
+smooth_use_l1 = True
+# DTU scenes have huge XYZs
+xyz_scale = 1e-3
+# De facto batch size: number of random rays per gradient step
+n_rays_per_step = 1024
+normal_loss_weight = 1
+lvis_loss_weight = 1
+# Positional encoding
+pos_enc = True
+n_freqs_xyz = 10
+n_freqs_ldir = 4
+n_freqs_vdir = 4
+
+# ------ Network ------
+mlp_chunk = 65536
+mlp_width = 128
+mlp_depth = 4
+mlp_skip_at = 2
+
+viewer_prefix = http://vision38.csail.mit.edu
diff --git a/nerfactor/datasets/mvs_shape.py b/nerfactor/datasets/mvs_shape.py
@@ -0,0 +1,121 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pylint: disable=invalid-unary-operand-type
+
+from os.path import join
+import numpy as np
+
+from third_party.xiuminglib import xiuminglib as xm
+from nerfactor.util import logging as logutil, io as ioutil, tensor as tutil
+from nerfactor.datasets.nerf_shape import Dataset as BaseDataset
+
+
+logger = logutil.Logger(loggee="datasets/mvs_shape")
+
+
+class Dataset(BaseDataset):
+    def _glob(self):
+        mvs_root = self.config.get('DEFAULT', 'mvs_root')
+        # Glob metadata paths
+        mode_str = 'val' if self.mode == 'vali' else self.mode
+        if self.debug:
+            logger.warn("Globbing a single data file for faster debugging")
+            metadata_dir = join(mvs_root, '%s_000' % mode_str)
+        else:
+            metadata_dir = join(mvs_root, '%s_???' % mode_str)
+        # Include only cameras with all required buffers (depending on mode)
+        metadata_paths, incomplete_paths = [], []
+        for metadata_path in xm.os.sortglob(metadata_dir, 'metadata.json'):
+            id_ = self._parse_id(metadata_path)
+            view_dir = join(mvs_root, id_)
+            lvis_path = join(view_dir, 'lvis.npy')
+            normal_path = join(view_dir, 'normal.npy')
+            xyz_path = join(view_dir, 'xyz.npy')
+            alpha_path = join(view_dir, 'alpha.png')
+            paths = {
+                'xyz': xyz_path, 'normal': normal_path, 'lvis': lvis_path,
+                'alpha': alpha_path}
+            if self.mode != 'test':
+                rgba_path = join(view_dir, 'rgba.png')
+                paths['rgba'] = rgba_path
+            if ioutil.all_exist(paths):
+                metadata_paths.append(metadata_path)
+                self.meta2buf[metadata_path] = paths
+            else:
+                incomplete_paths.append(metadata_path)
+        if incomplete_paths:
+            logger.warn((
+                "Skipping\n\t%s\nbecause at least one of their paired "
+                "buffers doesn't exist"), incomplete_paths)
+        logger.info("Number of '%s' views: %d", self.mode, len(metadata_paths))
+        return metadata_paths
+
+    # pylint: disable=arguments-differ
+    def _load_data(self, metadata_path):
+        imh = self.config.getint('DEFAULT', 'imh')
+        use_nerf_alpha = self.config.getboolean('DEFAULT', 'use_nerf_alpha')
+        metadata_path = tutil.eager_tensor_to_str(metadata_path)
+        id_ = self._parse_id(metadata_path)
+        # Rays
+        metadata = ioutil.read_json(metadata_path)
+        h, w = metadata['imh'], metadata['imw']
+        cam_loc = np.array(metadata['cam_loc'])
+        rayo = np.tile(cam_loc[None, None, :], (h, w, 1))
+        rayo = rayo.astype(np.float32)
+        rayd = np.zeros_like(rayo) # dummy
+        # Load precomputed shape properties
+        paths = self.meta2buf[metadata_path]
+        xyz = ioutil.load_np(paths['xyz'])
+        normal = ioutil.load_np(paths['normal'])
+        if self.debug:
+            logger.warn("Faking light visibility for faster debugging")
+            lvis = 0.5 * np.ones(normal.shape[:2] + (512,), dtype=np.float32)
+        else:
+            lvis = ioutil.load_np(paths['lvis'])
+        # RGB and alpha, depending on the mode
+        if self.mode == 'test':
+            # No RGBA, so estimated alpha and placeholder RGB
+            alpha = xm.io.img.load(paths['alpha'])
+            alpha = xm.img.normalize_uint(alpha)
+            rgb = np.zeros_like(xyz)
+        else:
+            # Training or validation, where each camera has a paired image
+            rgba = xm.io.img.load(paths['rgba'])
+            assert rgba.ndim == 3 and rgba.shape[2] == 4, \
+                "Input image is not RGBA"
+            rgba = xm.img.normalize_uint(rgba)
+            rgb = rgba[:, :, :3]
+            if use_nerf_alpha: # useful for real scenes
+                alpha = xm.io.img.load(paths['alpha'])
+                alpha = xm.img.normalize_uint(alpha)
+            else:
+                alpha = rgba[:, :, 3] # ground-truth alpha
+        # Resize
+        if imh != xyz.shape[0]:
+            xyz = xm.img.resize(xyz, new_h=imh)
+            normal = xm.img.resize(normal, new_h=imh)
+            lvis = xm.img.resize(lvis, new_h=imh)
+            alpha = xm.img.resize(alpha, new_h=imh)
+            rgb = xm.img.resize(rgb, new_h=imh)
+        # Make sure there's no XYZ coinciding with camera (caused by occupancy
+        # accumulating to 0)
+        assert not np.isclose(xyz, rayo).all(axis=2).any(), \
+            "Found XYZs coinciding with the camera"
+        # Re-normalize normals and clip light visibility before returning
+        normal = xm.linalg.normalize(normal, axis=2)
+        assert np.isclose(np.linalg.norm(normal, axis=2), 1).all(), \
+            "Found normals with a norm far away from 1"
+        lvis = np.clip(lvis, 0, 1)
+        return id_, rayo, rayd, rgb, alpha, xyz, normal, lvis
diff --git a/nerfactor/models/nerfactor.py b/nerfactor/models/nerfactor.py
@@ -21,7 +21,6 @@
 import tensorflow as tf
 
 from third_party.xiuminglib import xiuminglib as xm
-from brdf.renderer import gen_light_xyz
 from nerfactor.models.shape import Model as ShapeModel
 from nerfactor.models.brdf import Model as BRDFModel
 from nerfactor.networks import mlp
@@ -63,9 +62,8 @@ def __init__(self, config, debug=False):
         self._light = None # see the light property
         light_h = self.config.getint('DEFAULT', 'light_h')
         self.light_res = (light_h, 2 * light_h)
-        lxyz, lareas = gen_light_xyz(*self.light_res)
-        self.lxyz = tf.convert_to_tensor(lxyz, dtype=tf.float32)
-        self.lareas = tf.convert_to_tensor(lareas, dtype=tf.float32)
+        lxyz, lareas = self._gen_lights()
+        self.lxyz, self.lareas = lxyz, lareas
         # Novel lighting conditions for relighting at test time:
         olat_inten = self.config.getfloat('DEFAULT', 'olat_inten', fallback=200)
         ambi_inten = self.config.getfloat(
@@ -143,7 +141,7 @@ def _init_net(self):
                 [mlp_width] * mlp_depth, act=['relu'] * mlp_depth,
                 skip_at=[mlp_skip_at])
             net['brdf_z_out'] = mlp.Network([self.z_dim], act=None)
-        # Training from scratch, finetuning, or just using NeRF geometry?
+        # Training from scratch, finetuning, or just using initial geometry?
         if self.shape_mode == 'scratch':
             net['normal_mlp'] = mlp.Network(
                 [mlp_width] * mlp_depth, act=['relu'] * mlp_depth,
@@ -385,13 +383,14 @@ def _pred_albedo_at(self, pts):
         mlp_layers = self.net['albedo_mlp']
         out_layer = self.net['albedo_out'] # output in [0, 1]
         embedder = self.embedder['xyz']
+        pts_scaled = self.xyz_scale * pts # transparent to the user
 
         def chunk_func(surf):
             surf_embed = embedder(surf)
             albedo = out_layer(mlp_layers(surf_embed))
             return albedo
 
-        albedo = self.chunk_apply(chunk_func, pts, 3, self.mlp_chunk)
+        albedo = self.chunk_apply(chunk_func, pts_scaled, 3, self.mlp_chunk)
         albedo = albedo_scale * albedo + albedo_bias # [bias, scale + bias]
         albedo = tf.debugging.check_numerics(albedo, "Albedo")
         return albedo # Nx3
@@ -400,13 +399,15 @@ def _pred_brdf_at(self, pts):
         mlp_layers = self.net['brdf_z_mlp']
         out_layer = self.net['brdf_z_out']
         embedder = self.embedder['xyz']
+        pts_scaled = self.xyz_scale * pts # transparent to the user
 
         def chunk_func(surf):
             surf_embed = embedder(surf)
             brdf_z = out_layer(mlp_layers(surf_embed))
             return brdf_z
 
-        brdf_z = self.chunk_apply(chunk_func, pts, self.z_dim, self.mlp_chunk)
+        brdf_z = self.chunk_apply(
+            chunk_func, pts_scaled, self.z_dim, self.mlp_chunk)
         return brdf_z # NxZ
 
     def _eval_brdf_at(self, pts2l, pts2c, normal, albedo, brdf_prop):
@@ -502,7 +503,7 @@ def compute_loss(self, pred, gt, **kwargs):
             return loss
         # If we modify the geometry
         if self.shape_mode in ('scratch', 'finetune'):
-            # Predicted values should be close to NeRF values
+            # Predicted values should be close to initial values
             normal_loss = tf.keras.losses.MSE(normal_gt, normal_pred) # N
             lvis_loss = tf.keras.losses.MSE(lvis_gt, lvis_pred) # N
             loss += normal_loss_weight * normal_loss
@@ -725,13 +726,13 @@ def composite_on_avg_light(render, light_uint):
             (im1, im2), outpath=join(outdir, 'pred-vs-gt_rgb.apng'))
         if self.shape_mode != 'nerf':
             im1 = xm.vis.text.put_text(
-                img_dict['gt_normal'], "NeRF", **put_text_kwargs)
+                img_dict['gt_normal'], "Initial", **put_text_kwargs)
             im2 = xm.vis.text.put_text(
                 img_dict['pred_normal'], "Prediction", **put_text_kwargs)
             xm.vis.anim.make_anim(
                 (im1, im2), outpath=join(outdir, 'pred-vs-gt_normal.apng'))
             im1 = xm.vis.text.put_text(
-                img_dict['gt_lvis'], "NeRF", **put_text_kwargs)
+                img_dict['gt_lvis'], "Initial", **put_text_kwargs)
             im2 = xm.vis.text.put_text(
                 img_dict['pred_lvis'], "Prediction", **put_text_kwargs)
             xm.vis.anim.make_anim(
@@ -776,7 +777,7 @@ def _compile_into_webpage(self, batch_dirs, out_html):
             rowtypes = ['text', 'image', 'image', 'image', 'image']
             if self.shape_mode == 'nerf':
                 row.append(join(batch_dir, 'gt_normal.png'))
-                rowcaps.append("Normal (NeRF)")
+                rowcaps.append("Normal (initial)")
                 rowtypes.append('image')
             else:
                 row.append(join(batch_dir, 'pred-vs-gt_normal.apng'))
@@ -787,7 +788,7 @@ def _compile_into_webpage(self, batch_dirs, out_html):
                 rowtypes.append('image')
             if self.shape_mode == 'nerf':
                 row.append(join(batch_dir, 'gt_lvis.png'))
-                rowcaps.append("Light Visibility (NeRF)")
+                rowcaps.append("Light Visibility (initial)")
                 rowtypes.append('image')
             else:
                 row.append(join(batch_dir, 'pred-vs-gt_lvis.apng'))