# make dataset

In [1]:
def get_opt():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--path', default='./data/photoshapes/shape09096_rank00',type=str)
    parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --preload")
    parser.add_argument('--test', action='store_true', help="test mode")
    parser.add_argument('--workspace', type=str, default='workspace')
    parser.add_argument('--seed', type=int, default=0)
    ### training options
    parser.add_argument('--iters', type=int, default=15000, help="training iters")
    parser.add_argument('--lr', type=float, default=1e-2, help="initial learning rate")
    parser.add_argument('--ckpt', type=str, default='best')
    parser.add_argument('--num_rays', type=int, default=4096, help="num rays sampled per image for each training step")
    parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
    parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
    parser.add_argument('--num_steps', type=int, default=512, help="num steps sampled per ray (only valid when NOT using --cuda_ray)")
    parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)")
    parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
    parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)")
    parser.add_argument('--patch_size', type=int, default=1, help="[experimental] render patches in training, so as to apply LPIPS loss. 1 means disabled, use [64, 32, 16] to enable")
    ### network backbone options
    parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
    parser.add_argument('--ff', action='store_true', help="use fully-fused MLP")
    parser.add_argument('--tcnn', action='store_true', help="use TCNN backend")
    ### dataset options
    parser.add_argument('--color_space', type=str, default='srgb', help="Color space, supports (linear, srgb)")
    parser.add_argument('--preload', action='store_true', help="preload all data into GPU, accelerate training but use more GPU memory")
    # (the default value is for the fox dataset)
    parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
    parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
    parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
    parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
    parser.add_argument('--min_near', type=float, default=0.2, help="minimum near distance for camera")
    parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
    parser.add_argument('--bg_radius', type=float, default=-1, help="if positive, use a background model at sphere(bg_radius)")
    ### GUI options
    parser.add_argument('--gui', action='store_true', help="start a GUI")
    parser.add_argument('--W', type=int, default=1920, help="GUI width")
    parser.add_argument('--H', type=int, default=1080, help="GUI height")
    parser.add_argument('--radius', type=float, default=5, help="default GUI camera radius from center")
    parser.add_argument('--fovy', type=float, default=50, help="default GUI camera fovy")
    parser.add_argument('--max_spp', type=int, default=64, help="GUI rendering max sample per pixel")
    ### experimental
    parser.add_argument('--error_map', action='store_true', help="use error map to sample rays")
    parser.add_argument('--clip_text', type=str, default='', help="text input for CLIP guidance")
    parser.add_argument('--rand_pose', type=int, default=-1, help="<0 uses no rand pose, =0 only uses rand pose, >0 sample one rand pose every $ known poses")



    return parser.parse_args(args=[])
opt=get_opt()

opt.fp16 = True
opt.cuda_ray = True
opt.preload = True

opt.test=True

if opt.patch_size > 1:
    opt.error_map = False # do not use error_map if use patch-based training
    # assert opt.patch_size > 16, "patch_size should > 16 to run LPIPS loss."
    assert opt.num_rays % (opt.patch_size ** 2) == 0, "patch_size ** 2 should be dividable by num_rays."


# Checking the number of parameter

In [6]:
param=0
for name,child in model.named_children():
    print(f'module_name={name}, parameter={sum(p.numel() for p in child.parameters() if p.requires_grad)}')
    param+= sum(p.numel() for p in child.parameters() if p.requires_grad)

print(param)

module_name=encoder, parameter=494592
module_name=sigma_net, parameter=3072
module_name=encoder_dir, parameter=0
module_name=color_net, parameter=7168
504832


In [4]:

from nerf.provider import *
import os
import glob
import json
import tqdm
import numpy as np
from scipy.spatial.transform import Slerp, Rotation


import torch
from torch.utils.data import DataLoader

from nerf.network_tcnn import NeRFNetwork
import matplotlib.pyplot as plt
from nerf.utils import *
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

model=NeRFNetwork(
        encoding="hashgrid",
        bound=opt.bound,
        cuda_ray=opt.cuda_ray,
        density_scale=1,
        min_near=opt.min_near,
        density_thresh=opt.density_thresh,
        bg_radius=opt.bg_radius,
    ).to(device)


In [39]:
for name,child in model.named_children():
    print(f'module_name={name}, parameter={sum(p.numel() for p in child.parameters() if p.requires_grad)}')

module_name=encoder, parameter=496240
module_name=sigma_net, parameter=3072
module_name=encoder_dir, parameter=0
module_name=color_net, parameter=7168


# Random generation

In [2]:

from nerf.provider import *
import os
import glob
import json
import tqdm
import numpy as np
from scipy.spatial.transform import Slerp, Rotation


import torch
from torch.utils.data import DataLoader

from nerf.network_tcnn import NeRFNetwork
import matplotlib.pyplot as plt
from nerf.utils import *

device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
opt.path='./data/photoshapes/shape09096_rank01'
opt.cuda_ray=True
model = NeRFNetwork(
        encoding="hashgrid",
        bound=opt.bound,
        cuda_ray=opt.cuda_ray,
        density_scale=1,
        min_near=opt.min_near,
        density_thresh=opt.density_thresh,
        bg_radius=opt.bg_radius,
    ).to(device)

In [3]:
torch.cuda.empty_cache()
ckpt=torch.load('./data/photoshape_weight/shape09096_rank01/checkpoints/ngp.pth')

model.load_state_dict(ckpt['model'],strict=False)
model.to(device)
model.eval()


NeRFNetwork(
  (encoder): Encoding(n_input_dims=3, n_output_dims=32, seed=1337, dtype=torch.float16, hyperparams={'base_resolution': 16, 'hash': 'CoherentPrime', 'interpolation': 'Linear', 'log2_hashmap_size': 14, 'n_features_per_level': 2, 'n_levels': 16, 'otype': 'Grid', 'per_level_scale': 1.4472692012786865, 'type': 'Hash'})
  (sigma_net): Network(n_input_dims=32, n_output_dims=16, seed=1337, dtype=torch.float16, hyperparams={'encoding': {'offset': 0.0, 'otype': 'Identity', 'scale': 1.0}, 'network': {'activation': 'ReLU', 'n_hidden_layers': 1, 'n_neurons': 64, 'otype': 'FullyFusedMLP', 'output_activation': 'None'}, 'otype': 'NetworkWithInputEncoding'})
  (encoder_dir): Encoding(n_input_dims=3, n_output_dims=16, seed=1337, dtype=torch.float16, hyperparams={'degree': 4, 'otype': 'SphericalHarmonics'})
  (color_net): Network(n_input_dims=31, n_output_dims=3, seed=1337, dtype=torch.float16, hyperparams={'encoding': {'offset': 0.0, 'otype': 'Identity', 'scale': 1.0}, 'network': {'activat

- if you have "transforms_test.json" file in path, this code make video of your dataset

- This code is making random pose

In [4]:
from torch.cuda.amp import autocast
from torch.utils.data import DataLoader

poses=rand_poses(5,device,theta_range=[np.pi/3,np.pi/3],radius=0.5)
# make intrinsic
focal=245/(2*np.tan(np.radians(60)/2))
intrinsics=np.array([focal,focal,256//2,256//2])
data=get_rays(poses,intrinsics,256,256)

with autocast():
    with torch.no_grad():
        result=model.render(data['rays_o'].float(),data['rays_d'].float())

RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [41]:
pred=result['image'].reshape(-1,256,256,3)
pred=srgb_to_linear(pred).detach().cpu().numpy()
pred=(pred*255).astype(np.uint8)

cv2.imwrite(os.path.join("/home/poong/hyperdiff",'sample1.png'), cv2.cvtColor(pred[3], cv2.COLOR_RGB2BGR))


True

In [5]:


data=get_rays(poses,intrinsics,256,256)
with autocast():
    with torch.no_grad():
        result=model.render(data['rays_o'].float(),data['rays_d'].float())

pred=result['image'].reshape(-1,256,256,3)
pred=srgb_to_linear(pred).detach().cpu().numpy()
pred=(pred*255).astype(np.uint8)

cv2.imwrite(os.path.join("/home/poong/hyperdiff",'sample.png'), cv2.cvtColor(pred[0], cv2.COLOR_RGB2BGR))

RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemmStridedBatched( handle, opa, opb, m, n, k, &alpha, a, lda, stridea, b, ldb, strideb, &beta, c, ldc, stridec, num_batches)`

In [4]:
poses=line_poses(10,device,radius=0.5)
data=get_rays(poses,intrinsics,256,256)
with autocast():
    with torch.no_grad():
        result=model.render(data['rays_o'].float(),data['rays_d'].float())

pred=result['image'].reshape(-1,256,256,3)
pred=srgb_to_linear(pred).detach().cpu().numpy()
pred=(pred*255).astype(np.uint8)

#cv2.imwrite(os.path.join("/home/poong/hyperdiff",'sample0.png'), cv2.cvtColor(pred[3], cv2.COLOR_RGB2BGR))
imageio.mimwrite(os.path.join("/home/poong/hyperdiff",'sample.mp4'),pred, fps=2, quality=8, macro_block_size=1)


NameError: name 'intrinsics' is not defined

In [None]:
metrics = [PSNRMeter(), LPIPSMeter(device=device)]
test_loader = NeRFDataset(opt, device=device, type='test').dataloader()
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, fp16=opt.fp16, metrics=metrics, use_checkpoint=opt.ckpt)
trainer.test(test_loader, write_video=True)

### Seed Checking

In [2]:
from nerf.provider import *
import os
import glob
import json
import tqdm
import numpy as np
from scipy.spatial.transform import Slerp, Rotation


import torch
from torch.utils.data import DataLoader

from nerf.network_tcnn import NeRFNetwork
import matplotlib.pyplot as plt

opt=get_opt()
device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu')

In [3]:
seed_everything(opt.seed)
model = NeRFNetwork(
        encoding="hashgrid",
        bound=opt.bound,
        cuda_ray=opt.cuda_ray,
        density_scale=1,
        min_near=opt.min_near,
        density_thresh=opt.density_thresh,
        bg_radius=opt.bg_radius,
    )

NameError: name 'seed_everything' is not defined

In [24]:
model.encoder.params

Parameter containing:
tensor([-7.0460e-05, -3.4031e-05,  3.8013e-05,  ..., -1.4546e-05,
         2.6831e-05, -4.1242e-05], device='cuda:0', requires_grad=True)

In [22]:
import tinycudann as tcnn
encoder = tcnn.Encoding(
    n_input_dims=3,
    seed=1337,
    encoding_config={
        "otype": "HashGrid",
        "n_levels": 16,
        "n_features_per_level": 2,
        "log2_hashmap_size": 14,
        "base_resolution": 16,
        "per_level_scale": np.exp2(np.log2(2048 * 1/ 16) / (16 - 1)),
    },
)

In [25]:
ckpt=torch.load('./result/shape09096_rank00/checkpoints/ngp.pth')

In [29]:
ckpt['mean_density']

0.7593666911125183

## Transformer 

In [1]:

from nerf.provider import *
import os
import glob
import json
import tqdm
import numpy as np
from scipy.spatial.transform import Slerp, Rotation
import math


import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import dataset

from nerf.network_tcnn import NeRFNetwork
import matplotlib.pyplot as plt
from nerf.utils import *
from weight_encoder import *
ckpt=torch.load('./data/photoshape_weight/shape09096_rank00/checkpoints/ngp.pth')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [11]:
ckpt['model'].keys()

odict_keys(['aabb_train', 'aabb_infer', 'density_bitfield', 'step_counter', 'encoder.params', 'sigma_net.params', 'encoder_dir.params', 'color_net.params'])

In [2]:
param=torch.concat((ckpt['model']['encoder.params'],ckpt['model']['sigma_net.params'],ckpt['model']['color_net.params']),-1)

In [3]:
encoder_layer=TransformerEncoderLayer(d_model=512,nhead=8).to(device)
#transformer_encoder=torch.nn.TransformerEncoder(encoder_layer,num_layers=6).to(device)

In [4]:
encoded=Transformer_Encoder(param.size(),param.size(), chunk_size=512)(param.unsqueeze(0)).cpu()                                   
#positioned=PositionalEncoding(512)(encoded)
#out=transformer_encoder(positioned.to(device))

986
986
[512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 5

In [43]:
decoder_layer=torch.nn.TransformerDecoderLayer(d_model=512,nhead=8)
decoder=torch.nn.TransformerDecoder(decoder_layer,num_layers=6).to(device)
src=torch.rand_like(out)
result=decoder(src,out)

In [78]:
downsampling=nn.Linear(512,256).to(device)
downsampling(out).shape

torch.Size([1, 986, 256])

In [41]:
ckpt['model']['encoder.params'].shape

torch.Size([494592])

In [58]:
result.flatten()

tensor([ 0.5999,  1.3323,  0.3148,  ..., -1.6161,  1.4250, -1.2095],
       device='cuda:5', grad_fn=<ReshapeAliasBackward0>)

In [73]:
index1=len(ckpt['model']['encoder.params'])
index2=index1+len(ckpt['model']['sigma_net.params'])
index3=index2+len(ckpt['model']['color_net.params'])
ckpt['model']['encoder.params']=result.flatten()[:index1]
ckpt['model']['sigma_net.params']=result.flatten()[index1:index2]
ckpt['model']['color_net.params']=result.flatten()[index2:]

In [74]:
torch.save(ckpt,'hi.pth')

In [16]:
encoded.shape

torch.Size([1, 986, 512])

In [1]:
from weight_encoding.transformer.encoder import *
from weight_encoding.transformer.block import Weight_Split,DecoderLayer
from nerf.provider import *
import os
import glob
import json
import tqdm
import numpy as np
import math


import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.utils.data import dataset

from nerf.network_tcnn import NeRFNetwork
import matplotlib.pyplot as plt
from nerf.utils import *


ckpt=torch.load('./data/photoshape_weight/shape09096_rank00/checkpoints/ngp.pth')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
param=torch.concat((ckpt['model']['encoder.params'],ckpt['model']['sigma_net.params'],ckpt['model']['color_net.params']),-1)
splited_param=Weight_Split(param.size(),param.size(), chunk_size=512)(param.unsqueeze(0)).to(device)                               

encoder = Encoder(986,512,2048,8,8,0.1,device).to(device)

986
986


In [2]:
encoded=encoder(splited_param)

In [3]:
decoder = DecoderLayer(32,64,8,0.1,out=64).to(device)
decoded = decoder(encoded,encoded)