Part2

In [8]:
data = np.load("/kaggle/input/load-scene/lego_200x200.npz")

# Training images: [100, 200, 200, 3]
images_train = data["images_train"] / 255.0

# Cameras for the training images
# (camera-to-world transformation matrix): [100, 4, 4]
c2ws_train = data["c2ws_train"]

# Validation images:
images_val = data["images_val"] / 255.0

# Cameras for the validation images: [10, 4, 4]
# (camera-to-world transformation matrix): [10, 200, 200, 3]
c2ws_val = data["c2ws_val"]

# Test cameras for novel-view video rendering:
# (camera-to-world transformation matrix): [60, 4, 4]
c2ws_test = data["c2ws_test"]

# Camera focal length
focal = data["focal"]  # float

In [9]:
def transform(c2w, x_c):
  x_c_homogeneous = np.hstack((x_c, np.ones((x_c.shape[0], 1))))
  x_w_homogeneous = np.matmul(c2w, x_c_homogeneous.T).T
  x_w = x_w_homogeneous[:, :3] / x_w_homogeneous[:, 3:]
  return x_w

In [10]:
def pixel_to_camera(K, uv, s):
  uv_padded = np.hstack((uv, np.ones((x_c.shape[0], 1))))
  K_inv = np.linalg.inv(K)
  pixel_c = s * K_inv @ uv_padded.T
  return pixel_c.T

In [12]:
def pixel_to_ray(K, c2w, uv):
    w2c = np.linalg.inv(c2w)
    ray_o = -1 * np.dot(np.linalg.inv(w2c[:3, :3]), w2c[:3, 3])
    uv = np.array(uv)
    uv_padded = np.hstack((uv, np.ones((uv.shape[0], 1))))
    K_inv = np.linalg.inv(K)
    xyz_c = np.dot(K_inv, uv_padded.T).T
    xyz_w = transform(c2w, xyz_c)
    ray_d = xyz_w - ray_o
    ray_d /= np.linalg.norm(ray_d, axis=1, keepdims=True)
    length = ray_d.shape[0]
    ray_o = np.tile(ray_o, (length, 1))
    return ray_o, ray_d

In [24]:
def sampleRays(N, images_train, c2ws_train):
    Ox = images_train[0].shape[1] / 2
    Oy = images_train[0].shape[0] / 2
    K = np.array([[focal, 0, Ox], [0, focal, Oy], [0, 0, 1]])
    num_images = images_train.shape[0]
    num_per_image = N // num_images
    image_width = images_train.shape[2]
    image_height = images_train.shape[1]
    sampled_ray_origins, sampled_ray_directions = [], []
    pixel_colors = []
    for i in range(num_images):
      image = images_train[i]
      sampled_x = np.random.randint(image_height, size = num_per_image)
      sampled_y = np.random.randint(image_height, size = num_per_image)
      sampled_coords = np.column_stack((sampled_x + 0.5, sampled_y + 0.5))
      sampled_colors = image[sampled_x,sampled_y ]
      pixel_colors.extend(sampled_colors)
      ray_o, ray_d = pixel_to_ray(K, c2ws_train[i], sampled_coords)
      sampled_ray_origins.extend(ray_o)
      sampled_ray_directions.extend(ray_d)
    return np.array(sampled_ray_origins), np.array(sampled_ray_directions), np.array(pixel_colors)

In [15]:
def sample_along_rays(rays_o, rays_d, perturb=True):
    n_samples = 42
    near, far = 2.0, 6.0
    t = np.linspace(near, far, n_samples)
    t_width = 0.5
    t = t + (np.random.rand(n_samples) - 0.5) * t_width
    ray_os = np.tile(rays_o, (n_samples, 1))
    ray_dirs = np.tile(rays_d, (n_samples, 1))
    points = ray_os + ray_dirs * t[:, np.newaxis]
    return ray_os

In [16]:
def flatten_images(images):
  images_tensor = torch.tensor(images)
  x_dim, y_dim, z_dim = images_tensor.size(0), images_tensor.size(1), images_tensor.size(2)
  images_tensor_flattened = images_tensor.view(x_dim * y_dim * z_dim, -1)
  return images_tensor_flattened

In [17]:
def sample_along_rays(rays_o, rays_d, perturb=True):
      points = []
      n_samples = 48
      near, far = 2.0, 6.0
      t = np.linspace(near, far, n_samples)
      t_width = 0.5
      t = t + (np.random.rand(n_samples) - 0.5) * t_width
      for ray_o, ray_d in zip(rays_o, rays_d):
        ray_os = np.tile(ray_o, (n_samples, 1))
        ray_dirs = np.tile(ray_d, (n_samples, 1))
        points.extend(ray_os + ray_dirs * t[:, np.newaxis])
      return np.array(points)

In [23]:
import torch
from torch.utils.data import Dataset, DataLoader
class RaysData(Dataset):
    def __init__(self, images_train, K, c2ws_train):
        self.images = images_train
        self.K = K
        self.camera_parameters = c2ws_train
        self.num_samples_per_ray = 48
        self.ray_origins = []
        self.ray_directions = []
        self.pixel_colors = []
        self.uvs = []
        height = images_train[0].shape[0]
        width = images_train[0].shape[1]
        x_coords, y_coords = np.meshgrid(np.arange(height), np.arange(width))
        pixel_positions = np.column_stack((x_coords.ravel(), y_coords.ravel()))
        self.uvs = np.tile(pixel_positions,images_train.shape[0])
        print("uv shape")
        print(self.uvs.shape)
        pixel_colors = []
        K = np.array([[focal, 0, Ox], [0, focal, Oy], [0, 0, 1]])
        self.rays_o = []
        self.rays_d = []
        for i, image in enumerate(images_train):
            pixel_colors.extend(image[pixel_positions[:, 1], pixel_positions[:, 0]])
#             c2w = c2ws_train[i]
#             uv = self.uvs[height*width * (i - 1):height*width * i]
            
#             ray_o, ray_d = pixel_to_ray(K, c2w, uv) 
#             ray_os = np.tile(ray_o, height * width)
#             ray_ds = np.tile(ray_d, height * width)
#             self.rays_o.extend(ray_os)
#             self.rays_d.extend(ray_ds)
        self.pixels = np.array(pixel_colors)
#         self.rays_o, self.rays_d = np.array(self.rays_o), np.array(self.rays_d)

    def sample_rays(self, N = 100):
        images_train = self.images
        c2ws_train = self.camera_parameters
        num_images = images_train.shape[0]
        num_per_image = N // num_images
        image_width = images_train.shape[2]
        image_height = images_train.shape[1]
        sampled_ray_origins, sampled_ray_directions = [], []
        pixel_colors = []
        all_coords = []
        for i in range(num_images):
            image = images_train[i]
            sampled_x = np.random.randint(image_height, size = num_per_image)
            sampled_y = np.random.randint(image_width, size = num_per_image)
            sampled_coords = np.column_stack((sampled_x , sampled_y))
            sampled_coords_offset = np.column_stack((sampled_x , sampled_y))
            sampled_colors = image[sampled_coords]
            pixel_colors.extend(sampled_colors)
            ray_o, ray_d = pixel_to_ray(self.K, c2ws_train[i], sampled_coords_offset)
            sampled_ray_origins.extend(ray_o)
            sampled_ray_directions.extend(ray_d)
            all_coords.extend(sampled_coords)
        return np.array(sampled_ray_origins), np.array(sampled_ray_directions), np.array(pixel_colors)

    def pixel_to_ray(self, K, c2w, uv):
        w2c = np.linalg.inv(c2w)
        ray_o = -1 * np.dot(np.linalg.inv(w2c[:3, :3]), w2c[:3, 3])
        uv = np.array(uv)
        uv_padded = np.hstack((uv, np.ones((uv.shape[0], 1))))
        K_inv = np.linalg.inv(K)
        xyz_c = np.dot(K_inv, uv_padded.T).T
        xyz_w = transform(c2w, xyz_c)
        ray_d = xyz_w - ray_o
        ray_d /= np.linalg.norm(ray_d, axis=1, keepdims=True)
        length = ray_d.shape[0]
        ray_o = np.tile(ray_o, (length, 1))
        return ray_o, ray_d 


  # def _sample_points_along_all_rays(self):
  #     origins, directions, colors = self.sample_rays()
  #     for ray_o, ray_d, color in zip(origins, directions, colors):
  #         originsAlongRay = self.sample_points_along_ray(ray_o, ray_d)
  #         length = len(originsAlongRay)
  #         directionsAlongRay = np.tile(ray_d, (length, 1))
  #         colorsAlongRay = np.tile(color, (length, 1))
  #         self.ray_origins.extend(originsAlongRay)
  #         self.ray_directions.extend( directionsAlongRay)
  # #         self.pixel_colors.extend(colorsAlongRay)

  # def _sample_along_rays_(self, rays_o, rays_d, perturb=True)
  #     n_samples = self.num_samples_per_ray
  #     near, far = 2.0, 6.0
  #     t = np.linspace(near, far, n_samples)
  #     t_width = 0.5
  #     t = t + (np.random.rand(n_samples) - 0.5) * t_width
  #     ray_os = np.tile(rays_o, (n_samples, 1))
  #     ray_dirs = np.tile(rays_dir, (n_samples, 1))
  #     points = ray_os + ray_dirs * t[:, np.newaxis]
  #     return np.array(points)


In [19]:
!pip install viser

Collecting viser
  Obtaining dependency information for viser from https://files.pythonhosted.org/packages/3c/1a/de6f7537c10e3c944f9e53a31b1a6551004fe0a0cc90243e3d805a768bec/viser-0.1.10-py3-none-any.whl.metadata
  Downloading viser-0.1.10-py3-none-any.whl.metadata (4.4 kB)
Collecting pyliblzfse>=0.4.1 (from viser)
  Downloading pyliblzfse-0.4.1.tar.gz (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting tyro>=0.2.0 (from viser)
  Obtaining dependency information for tyro>=0.2.0 from https://files.pythonhosted.org/packages/19/c3/35e23412b4c9b38841ec79f7a69fc57967c8545057ac2ce31647e918b3a2/tyro-0.5.14-py3-none-any.whl.metadata
  Downloading tyro-0.5.14-py3-none-any.whl.metadata (7.5 kB)
Collecting gdown>=4.6.6 (from viser)
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Collecting trimesh>=3.21.7 (from viser)
  Obtaining dependency information for

In [None]:
import viser, time  # pip install viser
import numpy as np

dataset = RaysData(images_train, K, c2ws_train)
rays_o, rays_d, pixels = dataset.sample_rays(100)
points = sample_along_rays(rays_o, rays_d, perturb=True)
H, W = images_train.shape[1:3] 

server = viser.ViserServer(share=True)
for i, (image, c2w) in enumerate(zip(images_train, c2ws_train)):
    server.add_camera_frustum(
        f"/cameras/{i}",
        fov=2 * np.arctan2(H / 2, K[0, 0]),
        aspect=W / H,
        scale=0.15,
        wxyz=viser.transforms.SO3.from_matrix(c2w[:3, :3]).wxyz,
        position=c2w[:3, 3],
        image=image
    )
for i, (o, d) in enumerate(zip(rays_o, rays_d)):
    server.add_spline_catmull_rom(
        f"/rays/{i}", positions=np.stack((o, o + d * 6.0)),
    )
server.add_point_cloud(
    f"/samples",
    colors=np.zeros_like(points).reshape(-1, 3),
    points=points.reshape(-1, 3),
    point_size=0.02,
)
time.sleep(1000) 

In [None]:
# Visualize Cameras, Rays and Samples
import viser, time
import numpy as np

dataset = RaysData(images_train, K, c2ws_train)
uvs_start = 0
uvs_end = 40_000
sample_uvs = dataset.uvs[uvs_start:uvs_end]
images_train[0, sample_uvs[:,1], sample_uvs[:,0]]
assert np.all(images_train[0, sample_uvs[:,1], sample_uvs[:,0]] == dataset.pixels[uvs_start:uvs_end])

# Uncoment this to display random rays from the first image
indices = np.random.randint(low=0, high=40_000, size=100)

# # Uncomment this to display random rays from the top left corner of the image
# indices_x = np.random.randint(low=100, high=200, size=100)
# indices_y = np.random.randint(low=0, high=100, size=100)
# indices = indices_x + (indices_y * 200)

data = {"rays_o": dataset.rays_o[indices], "rays_d": dataset.rays_d[indices]}
points = sample_along_rays(data["rays_o"], data["rays_d"], random=True)

server = viser.ViserServer(share=True)
for i, (image, c2w) in enumerate(zip(images_train, c2ws_train)):
  server.add_camera_frustum(
    f"/cameras/{i}",
    fov=2 * np.arctan2(H / 2, K[0, 0]),
    aspect=W / H,
    scale=0.15,
    wxyz=viser.transforms.SO3.from_matrix(c2w[:3, :3]).wxyz,
    position=c2w[:3, 3],
    image=image
  )
for i, (o, d) in enumerate(zip(data["rays_o"], data["rays_d"])):
  positions = np.stack((o, o + d * 6.0)) 
  server.add_spline_catmull_rom(
      f"/rays/{i}", positions=positions,
  )
server.add_point_cloud(
    f"/samples",
    colors=np.zeros_like(points).reshape(-1, 3),
    points=points.reshape(-1, 3),
    point_size=0.03,
)
time.sleep(1000)

In [None]:
def volume_render(sigmas, colors, delta):
    zero_padding = torch.zeros(sigmas.shape[0], 1, sigmas.shape[2])
    sigmas_padded = torch.cat((zero_padding, sigmas), dim=1)
    sigmas_padded = sigmas_padded[:, :-1]
    T = torch.exp(-torch.cumsum(sigmas_padded * delta, dim=1))
    C = T * (1.0 - torch.exp(-sigmas * delta)) * colors
    return torch.sum(C, dim=1)

In [84]:
import torch
from torch.utils.data import Dataset, DataLoader
class RaysData(Dataset):
  def __init__(self, images_train, K, c2ws_train):
      self.images = images_train
      self.K = torch.tensor(K,dtype=torch.float32)
      self.camera_parameters = c2ws_train
      self.num_samples_per_ray = 48
      self.ray_origins = []
      self.ray_directions = []
      self.pixel_colors = []
      self.uvs = []
      height = images_train[0].shape[0]
      width = images_train[0].shape[1]
      x_coords, y_coords = np.meshgrid(np.arange(height), np.arange(width))
      pixel_positions = np.column_stack((x_coords.ravel(), y_coords.ravel()))
      self.uvs = np.tile(pixel_positions,images_train.shape[0])
      pixel_colors = []
      for image in images_train:
        pixel_colors.extend(image[pixel_positions[:, 1], pixel_positions[:, 0]])
      self.pixels = np.array(pixel_colors)
      sample_result = self.sample_rays(images_train.shape[0] * height * width)
      self.rays_o, self.rays_d = sample_result[0], sample_result[1]

  def sample_rays(self, N = 100):
      images_train = self.images
      c2ws_train = self.camera_parameters
      num_images = images_train.shape[0]
      num_per_image = N // num_images
      image_width = images_train.shape[2]
      image_height = images_train.shape[1]
      sampled_ray_origins, sampled_ray_directions = [], []
      pixel_colors = []
      all_coords = []
      for i in range(num_images):
          image = images_train[i]
          sampled_x = np.random.randint(image_height, size = num_per_image)
          sampled_y = np.random.randint(image_width, size = num_per_image)
          sampled_coords = np.column_stack((sampled_x , sampled_y))
          sampled_coords_offset = np.column_stack((sampled_x , sampled_y))
          sampled_colors = image[sampled_coords]
          pixel_colors.extend(sampled_colors)
          ray_o, ray_d = pixel_to_ray(self.K, c2ws_train[i], sampled_coords_offset)
          sampled_ray_origins.extend(ray_o)
          sampled_ray_directions.extend(ray_d)
          all_coords.extend(sampled_coords)
      return np.array(sampled_ray_origins), np.array(sampled_ray_directions), np.array(pixel_colors)

In [None]:
dataset = RaysData(images_train, K, c2ws_train)
model = Nerf3D().cuda().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
loss_fn = nn.MSELoss()
for epoch in range(1000):
    optimizer.zero_grad()
    rays_o, rays_d, rays_rgb = dataset.sample_rays(max_idx = 100, ray_per_image=32)
    points = sample_points_from_rays(rays_o, rays_d, near=2.0, far=6.0, num_samples_per_ray=32, train=True)
    pred_sigmas, pred_rgbs = model(points.to(device), rays_o.to(device), rays_d.to(device))
    rend_img = volrend(pred_sigmas, pred_rgbs, 0.1)
    loss = loss_fn(rend_img.to(device), rays_rgb.to(device))
    loss.backward() 
    optimizer.step() 
    print(f"Epoch {epoch + 1} Loss: {loss.item():.4f}") 

In [None]:
with torch.no_grad():
    model.eval()
    rays_o, rays_d, rays_rgb = dataset.sample_rays(max_idx = 100, ray_per_image=32)
    points = sample_points_from_rays(rays_o, rays_d, near=2.0, far=6.0, num_samples_per_ray=32, train=True)
    pred_sigmas, pred_rgbs = model(points.to(device), rays_o.to(device), rays_d.to(device))
    rend_img = volrend(pred_sigmas, pred_rgbs, 0.5)
plt.imshow(rend_img.detach().cpu().numpy()) 