In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
# %matplotlib notebook

In [2]:
# general setup as defined in gen_video.py
import os.path
import torch
import torch.nn.functional as F
import numpy as np
import imageio
import util
import warnings
from data import get_split_dataset
from render import NeRFRenderer
from model import make_model
from scipy.interpolate import CubicSpline
# import tqdm


# Parameters

In [3]:
gpu_id = 0

In [4]:
num_views = 40
elevation = -10.0
scale = 1.0

In [5]:
dataset_format = 'srn'
want_split='train'
datadir = '/home/ubuntu/projects/pixel-nerf/data/cars'
subset = 0

# Algo Settings

In [6]:
device = util.get_cuda(gpu_id)

In [7]:
dset = get_split_dataset(dataset_format, datadir, want_split=want_split)

Loading SRN dataset /home/ubuntu/projects/pixel-nerf/data/cars_train name: cars
Loading SRN dataset /home/ubuntu/projects/pixel-nerf/data/cars_val name: cars
Loading SRN dataset /home/ubuntu/projects/pixel-nerf/data/cars_test name: cars


In [8]:
data = dset[subset]
images = data["images"]  # (NV, 3, H, W)
poses = data["poses"]  # (NV, 4, 4)
focal = data["focal"]
focal = focal[None]

In [9]:
z_near = dset.z_near
z_far = dset.z_far
z_near, z_far

(0.8, 1.8)

In [10]:
# image center - for an image of size 128 x 128, it's [64, 64] = c
c = data.get("c")
if c is not None:
    c = c.to(device=device).unsqueeze(0)

In [11]:
NV, _, H, W = images.shape
NV, H, W

(10, 128, 128)

# Conf File

In [12]:
from pyhocon import ConfigFactory

In [13]:
conf = 'conf/resnet_fine_mv.conf'
conf = ConfigFactory.parse_file(conf)

In [14]:
conf.get_string("type", "pixelnerf")

'pixelnerf'

In [15]:
conf.get_string("type", "spatial")

'spatial'

# Rays Generations

In [16]:
radius = (z_near + z_far) * 0.5

In [17]:
# Use 360 pose sequence from NeRF
render_poses = torch.stack(
    [
        util.pose_spherical(angle, elevation, radius)
        for angle in np.linspace(-180, 180, num_views + 1)[:-1]
    ],
    0,
)  # (NV, 4, 4)
render_poses.shape

torch.Size([40, 4, 4])

In [18]:
render_rays = util.gen_rays(
    render_poses,
    W,
    H,
    focal * scale,
    z_near,
    z_far,
    c=c * scale if c is not None else None,
).to(device=device)
render_rays.shape

torch.Size([40, 128, 128, 8])

# Build Net Model

## Load Model

In [19]:
conf["model"]

ConfigTree([('use_encoder', True),
            ('use_global_encoder', False),
            ('use_xyz', True),
            ('canon_xyz', False),
            ('use_code', True),
            ('code',
             ConfigTree([('num_freqs', 6),
                         ('freq_factor', 1.5),
                         ('include_input', True)])),
            ('use_code_viewdirs', False),
            ('mlp_coarse',
             ConfigTree([('type', 'resnet'),
                         ('n_blocks', 5),
                         ('d_hidden', 512),
                         ('combine_layer', 3),
                         ('combine_type', 'average')])),
            ('mlp_fine',
             ConfigTree([('type', 'resnet'),
                         ('n_blocks', 5),
                         ('d_hidden', 512),
                         ('combine_layer', 3),
                         ('combine_type', 'average')])),
            ('encoder',
             ConfigTree([('backbone', 'resnet34'),
                      

In [20]:
# Build PixelNeRFNet from model/models.py
net = make_model(conf["model"]).to(device=device)
# net.encoder

Using torchvision resnet34 encoder


In [21]:
# Build Renderer from render/nerf.py
ray_batch_size = 50000
black = True
renderer = NeRFRenderer.from_conf(
    conf["renderer"],
    white_bkgd=not black,
    lindisp=dset.lindisp,
    eval_batch_size=ray_batch_size,
).to(device=device)

In [22]:
torch.randint(0, NV, (1,))

tensor([4])

## Split up Model

In [23]:
data["images"].shape

torch.Size([10, 3, 128, 128])

# Make Model Spliting

# Flow

In [24]:
# 1. net = make_model(conf["model"]).to(device=device)
#    this call build the encoder as well
# 2. renderer = NeRFRenderer.from_conf()
# 3. call net.encode()
#
#
#
#

# net.encode()

In [31]:
src_view = [0, 2]
net.encode(
    images[src_view].unsqueeze(0),
    poses[src_view].unsqueeze(0).to(device=device),
    focal,
    c=c,
)


> /home/ubuntu/projects/pixel-nerf/model/encoder.py(164)forward()
-> self.latent_scaling[0] = self.latent.shape[-1]
(Pdb) l
159  	                    mode=self.upsample_interp,
160  	                    align_corners=align_corners,
161  	                )
162  	            self.latent = torch.cat(latents, dim=1)
163  	        pdb.set_trace()
164  ->	        self.latent_scaling[0] = self.latent.shape[-1]
165  	        self.latent_scaling[1] = self.latent.shape[-2]
166  	        self.latent_scaling = self.latent_scaling / (self.latent_scaling - 1) * 2.0
167  	        return self.latent
168  	
169  	    @classmethod
(Pdb) self.latent.shape
torch.Size([2, 512, 64, 64])
(Pdb) 4*64
256
(Pdb) latents[0].shape
torch.Size([2, 64, 64, 64])
(Pdb) latents[1].shape
torch.Size([2, 64, 64, 64])
(Pdb) latents[2].shape
torch.Size([2, 128, 64, 64])
(Pdb) latents[2].shape
torch.Size([2, 128, 64, 64])
(Pdb) 2*64+2*128
384
(Pdb) l
170  	    def from_conf(cls, conf):
171  	        return cls(
172  	        

BdbQuit: 

# Encoder