# Create Nerfstudio datasets

This notebook loads a mesh and then creates a Nerfstudio dataset with 3D-consistent masks from obj files.

In [None]:
%load_ext autoreload
%autoreload 2

from pytorch3d.io import load_obj
from pytorch3d.structures import Meshes
from pytorch3d.renderer import Textures

from nerfiller.nerf.dataset_utils import create_nerfstudio_frame
from nerfiller.utils.mesh_utils import project_mesh_into_perspective_image, dilate, erode, get_mesh_from_perspective_images, get_cube
from nerfiller.utils.camera_utils import c2wh_from_c2w, rot_y, rot_x, rot_z, get_focal_len_from_fov
from nerfiller.utils.depth_utils import depth_to_distance
from nerfstudio.cameras.camera_utils import viewmatrix
from nerfiller.utils.image_utils import image_tensor_to_npy, get_inpainted_image_row
from nerfiller.utils.typing import *
from nerfiller.utils.io_utils import save_mesh
from nerfstudio.utils.colormaps import ColormapOptions, apply_colormap

import torch
import numpy as np
import mediapy
import json
import trimesh

In [None]:
device = "cuda:7"
dataset_name = "billiards"
image_size = 512
width = image_size
height = image_size
display_height = 100
theta_num = 16
phi_num = 4
keep_edge_size = 50

dataset_configs = {
    "billiards": {
        "cube_center": [-0.455698 + .15, 0.145552 + .5, -0.220525],
        "cube_scale": [0.17, 0.17, 0.17],
        "cube_rotation": [0.0, 0.0, -torch.pi / 8],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 0.35,
        "fov": 80,
        "theta_min": 0 - torch.pi,
        "theta_max": torch.pi/2 - torch.pi - torch.pi/8,
        "offset": [0, 1, 0],
        "phi_min": 0,
        "phi_max": torch.pi/4,
        "center": [-0.2, 0.5, -0.2],
        "inpaint_missing_depth": False,
        "keep_edges": False
    },
    "dumptruck": {
        "cube_center": [0.0, -0.45, -0.05],
        "cube_scale": [0.5, 0.5, 0.5],
        "cube_rotation": [0.0, 0.0, 0.0],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 2,
        "fov": 60,
        "theta_min": -3*torch.pi/4,
        "theta_max": 3*torch.pi/4,
        "offset": [0, 1, 0],
        "phi_min": -torch.pi/8,
        "phi_max": torch.pi/4,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": False,
        "keep_edges": False
    },
    "office": {
        "cube_center": [0.0, 0.0, 0.0],
        "cube_scale": [1e-6, 1e-6, 1e-6],
        "cube_rotation": [0.0, 0.0, -torch.pi / 8],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 0.2,
        "fov": 90,
        "theta_min": -torch.pi,
        "theta_max": torch.pi - 2 * torch.pi/theta_num,
        "offset": [0, 1, 0],
        "phi_min": -torch.pi/4,
        "phi_max": torch.pi/4,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": True, # cube occlusions are ignored if this is True
        "keep_edges": False
    },
    "drawing": {
        "cube_center": [0.8, 0.1, -0.3],
        "cube_scale": [0.2, 0.15, 0.15],
        "cube_rotation": [0.0, 0.0, torch.pi / 4],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 0.4,
        "fov": 90,
        "theta_min": 0 + torch.pi/8,
        "theta_max": torch.pi/2 + torch.pi/8,
        "offset": [0, 1, 0],
        "phi_min": torch.pi/16,
        "phi_max": 3*torch.pi/16,
        "center": [0.8, 0.1, -0.3],
        "inpaint_missing_depth": False,
        "keep_edges": False
    },
    "boot": {
        "cube_center": [0.8, 0.0, -0.1],
        "cube_scale": [0.7, 0.55, 0.7],
        "cube_rotation": [0.0, 0.0, 0.0],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 3,
        "fov": 60,
        "theta_min": -3*torch.pi/4 + torch.pi/2,
        "theta_max": 3*torch.pi/4 + torch.pi/2,
        "offset": [0, 1, 0],
        "phi_min": -torch.pi/4,
        "phi_max": torch.pi/4,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": False,
        "keep_edges": False
    },
    "norway": {
        "cube_center": [0.0, -0.8, -0.3],
        "cube_scale": [0.2, 0.15, 0.15],
        "cube_rotation": [0.0, 0.0, torch.pi / 4],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 0.7,
        "fov": 50,
        "theta_min": 0 + torch.pi/8 - torch.pi/4,
        "theta_max": torch.pi/2 - torch.pi/4,
        "offset": [0, 1, 0],
        "phi_min": torch.pi/16,
        "phi_max": 3*torch.pi/16,
        "center": [0.0, -0.8, -0.3],
        "inpaint_missing_depth": False,
        "keep_edges": False
    },
    "bear": {
        "use_occlusion_file": True,
        "cube_center": [0.0, 0.0, 0.0],
        "cube_scale": [1e-6, 1e-6, 1e-6],
        "cube_rotation": [0.0, 0.0, -torch.pi / 8],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 2,
        "fov": 50,
        "theta_min": 7*torch.pi/4 - 3*torch.pi/8,
        "theta_max": 7*torch.pi/4 + 3*torch.pi/8,
        "offset": [0, 1, 0],
        "phi_min": -torch.pi/8,
        "phi_max": torch.pi/4,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": False,
        "keep_edges": False,
        "dilate_iters": 10
    },
    "cat": {
        "use_occlusion_file": True,
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 2,
        "fov": 60,
        "theta_min": -7*torch.pi/4,
        "theta_max": 7*torch.pi/4 - 2 * torch.pi/theta_num,
        "offset": [0, 1, 0],
        "phi_min": 0,
        "phi_max": torch.pi/4,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": False,
        "keep_edges": False,
        "dilate_iters": 10,
        "background_color": "black"
    },
    "turtle": {
        "use_occlusion_file": True,
        "cube_center": [0.0, 0.0, 0.0],
        "cube_scale": [1e-6, 1e-6, 1e-6],
        "cube_rotation": [0.0, 0.0, -torch.pi / 8],
        "rotation": [torch.pi / 2, 0.0, 0.0],
        "radius": 2,
        "fov": 50,
        "theta_min": 2*torch.pi/4 - 5*torch.pi/8,
        "theta_max": 2*torch.pi/4 + 5*torch.pi/8,
        "offset": [0, 1, 0],
        "phi_min": -torch.pi/8,
        "phi_max": torch.pi/3,
        "center": [0.0, 0.0, 0.0],
        "inpaint_missing_depth": False,
        "keep_edges": False,
        "dilate_iters": 5,
        "background_color": "black"
    },
}

### Load a mesh

This is some pytorch3d logic. To load a mesh in the obj format.

In [None]:
filename = f"../data/meshes/{dataset_name}/model.obj"
verts, faces_pytorch3d, aux = load_obj(filename)

# rotate so Z is up
rot = torch.tensor(rot_x(dataset_configs[dataset_name]["rotation"][0])) @ (
    torch.tensor(rot_y(dataset_configs[dataset_name]["rotation"][1]))
    @ torch.tensor(rot_z(dataset_configs[dataset_name]["rotation"][2]))
)
verts = (rot @ verts.permute(1, 0)).permute(1, 0)

offset = torch.mean(verts, dim=0)
verts -= offset
scale = torch.abs(torch.max(torch.min(verts, 0).values - torch.max(verts, 0).values))
verts /= scale

verts_uvs = aux.verts_uvs[None, ...]  # (1, V, 2)
faces_uvs = faces_pytorch3d.textures_idx[None, ...]  # (1, F, 3)
tex_maps = aux.texture_images

# tex_maps is a dictionary of {material name: texture image}.
# Take the first image:
texture_image = list(tex_maps.values())[0]
texture_image = texture_image[None, ...]  # (1, H, W, 3)

# Create a textures object
tex = Textures(verts_uvs=verts_uvs, faces_uvs=faces_uvs, maps=texture_image)

if dataset_configs[dataset_name].get("use_occlusion_file", False):
    # use a mesh named occlusion.obj, which can be created in blender or anywhere
    mesh = trimesh.load(f"../data/meshes/{dataset_name}/occlusion.obj")
    cube_vertices = torch.from_numpy(mesh.vertices).float()
    cube_vertices = (rot @ cube_vertices.permute(1, 0)).permute(1, 0)
    cube_vertices = (cube_vertices - offset) / scale
    cube_vertices = cube_vertices.to(device)
    cube_faces = torch.from_numpy(mesh.faces).to(device)
else:
    cube_center = torch.tensor(dataset_configs[dataset_name]["cube_center"])
    cube_scale = torch.tensor(dataset_configs[dataset_name]["cube_scale"])
    cube_rotation = torch.tensor(dataset_configs[dataset_name]["cube_rotation"])
    cube_vertices, cube_faces = get_cube(cube_center, cube_scale, cube_rotation, device=device)
cube_vertex_colors = torch.ones_like(cube_vertices) * torch.tensor([1.0, 0, 0]).to(cube_vertices)
save_mesh(cube_vertices, cube_vertex_colors, cube_faces, filename=f"{dataset_name}-cube.ply")

In [None]:
output_folder = Path(f"../data/nerfstudio/{dataset_name}")

vertices = verts.to(device)
faces = faces_pytorch3d.verts_idx.to(device)
textures = tex.to(device)
vertex_colors = None

save_mesh(vertices, torch.ones_like(vertices), faces, filename=f"{dataset_name}.ply")

up = torch.tensor([0, 0, 1]).float()
center = torch.tensor(dataset_configs[dataset_name]["center"]).float()
offset = torch.tensor(dataset_configs[dataset_name]["offset"]).float()
fov = dataset_configs[dataset_name]["fov"]
radius = dataset_configs[dataset_name]["radius"]
inpaint_missing_depth = dataset_configs[dataset_name]["inpaint_missing_depth"]
keep_edges = dataset_configs[dataset_name]["keep_edges"]
dilate_iters = dataset_configs[dataset_name].get("dilate_iters", 0)
background_color = dataset_configs[dataset_name].get("background_color", "white")

thetas = torch.linspace(dataset_configs[dataset_name]["theta_min"], dataset_configs[dataset_name]["theta_max"], theta_num).repeat(phi_num)
phis = torch.linspace(dataset_configs[dataset_name]["phi_max"], dataset_configs[dataset_name]["phi_min"], phi_num)[:, None].repeat(1, theta_num).flatten()
radiuses = torch.tensor([radius] * (theta_num * phi_num))

In [None]:
images = []
masks = []
depths = []
poses = []

ref_p2f = None
for (theta, radius, phi) in zip(thetas, radiuses, phis):
    pos = center + (torch.tensor(rot_z(float(theta))) @ (torch.tensor(rot_x(float(phi))) @ offset)) * radius
    lookat = pos - center
    c2w = viewmatrix(
        lookat=lookat,
        up=up,
        pos=pos,
    )
    cube_image, cube_depth, cube_p2f = project_mesh_into_perspective_image(cube_vertices, cube_vertex_colors, cube_faces, fov=fov, image_size=image_size, c2w=c2w, textures=None, device=device)
    image, depth, p2f = project_mesh_into_perspective_image(vertices, vertex_colors, faces, fov=fov, image_size=image_size, c2w=c2w, textures=textures, device=device, cull_backfaces=True)
    # next line is so we don't include when we pass through a backface, e.g., this issue https://github.com/facebookresearch/pytorch3d/issues/945
    image_firstface, depth_firstface, p2f_firstface = project_mesh_into_perspective_image(vertices, vertex_colors, faces, fov=fov, image_size=image_size, c2w=c2w, textures=textures, device=device, cull_backfaces=False)

    if background_color == "white":
        image = (depth_firstface[..., None] == -1) * torch.ones_like(image) + (depth_firstface[..., None] != -1) * image
    elif background_color == "black":
        image = (depth_firstface[..., None] == -1) * torch.zeros_like(image) + (depth_firstface[..., None] != -1) * image
    else:
        raise ValueError()

    mask = torch.ones_like(depth)
    if inpaint_missing_depth:
        mask *= ((depth == depth_firstface) & (depth_firstface != -1)).float()
    mask *= 1 - (((depth_firstface == -1) & (cube_depth != -1)) | ((depth_firstface != -1) * (cube_depth != -1) & (cube_depth < depth_firstface))).float()

    if dilate_iters != 0:
        masktemp = 1 - mask
        for _ in range(dilate_iters):
            masktemp = dilate(masktemp[None,None], kernel_size=3)[0,0]
        mask = 1 - masktemp

    if keep_edges:
        image[:keep_edge_size] = 1.0
        image[:, :keep_edge_size] = 1.0
        image[-keep_edge_size:] = 1.0
        image[:, -keep_edge_size:] = 1.0
        mask[:keep_edge_size] = 1.0
        mask[:, :keep_edge_size] = 1.0
        mask[-keep_edge_size:] = 1.0
        mask[:, -keep_edge_size:] = 1.0
    
    # TODO: debug this in case the downsampling with mask causes issues
    image *= mask[..., None].float()
    depth *= mask.float()

    depth[depth==-1] = 0.0
    
    images.append(image.cpu())
    depths.append(depth.cpu())
    masks.append(mask.cpu())
    poses.append(c2wh_from_c2w(c2w).detach().cpu())

mediapy.show_images(images, height=display_height, columns=theta_num)
mediapy.show_images(depths, height=display_height, columns=theta_num)
mediapy.show_images(masks, height=display_height, columns=theta_num)

### Save the dataset in Nerfstudio format

In [None]:
output_folder.mkdir(parents=True, exist_ok=True)
(output_folder / "images").mkdir(parents=True, exist_ok=True)
(output_folder / "masks").mkdir(parents=True, exist_ok=True)
(output_folder / "depth").mkdir(parents=True, exist_ok=True)

cx = width / 2.0
cy = height / 2.0
fx = get_focal_len_from_fov(width, fov_in_degrees=fov)
fy = get_focal_len_from_fov(height, fov_in_degrees=fov)
template = {
    "camera_model": "OPENCV",
    "orientation_override": "none",
    "frames": [],
}
frames = []
for i in range(len(poses)):
    image = images[i]
    mask = masks[i]
    depth = depths[i]
    
    file_path = f"images/image_{i:06d}.png"
    mask_file_path = f"masks/mask_{i:06d}.png"
    depth_file_path = f"depth/depth_{i:06d}.npy"

    mediapy.write_image(output_folder / file_path, image.cpu())
    mediapy.write_image(output_folder / mask_file_path, mask.cpu())
    np.save(output_folder / depth_file_path, depth.cpu().numpy())

    depth_with_colormap = apply_colormap(depth[..., None], ColormapOptions(normalize=True))
    mediapy.write_image(output_folder / f"depth/depth_{i:06d}.png", depth_with_colormap.cpu())

    frame = create_nerfstudio_frame(
        fl_x=fx,
        fl_y=fy,
        cx=cx,
        cy=cy,
        w=width,
        h=height,
        pose=poses[i],
        file_path=file_path,
        mask_file_path=mask_file_path,
        depth_file_path=depth_file_path,
    )
    frames.append(frame)
template["frames"] = frames
with open(output_folder / "transforms.json", "w") as f:
    json.dump(template, f, indent=4)