In [1]:
from time import time
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import torch
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
from argparse import ArgumentParser
from tqdm import tqdm
from arguments import ModelParams, PipelineParams, ModelHiddenParams
from scene import Scene, GaussianModel
from gaussian_renderer import render, render_contrastive_feature, render_segmentation, render_mask
import imageio
from utils.segment_utils import *

%load_ext autoreload
%autoreload 2

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
FEATURE_DIM = 32

DATA_ROOT = './data/dynerf/cut_roasted_beef'
# the model path, same to the --model_path in the training, after train_scene.py this folder will be created but named randomly
MODEL_PATH = './output/dynerf/cut_roasted_beef'
GAUSSIAN_ITERATION = 14000

SAM_PROJ_PATH = os.path.join(MODEL_PATH, f'point_cloud/iteration_{str(GAUSSIAN_ITERATION)}/sam_proj.pt')
NEG_PROJ_PATH = os.path.join(MODEL_PATH, f'point_cloud/iteration_{str(GAUSSIAN_ITERATION)}/neg_proj.pt')
FEATURE_PCD_PATH = os.path.join(MODEL_PATH, f'point_cloud/iteration_{str(GAUSSIAN_ITERATION)}/feature_point_cloud.ply')
SCENE_PCD_PATH = os.path.join(MODEL_PATH, f'point_cloud/iteration_{str(GAUSSIAN_ITERATION)}/scene_point_cloud.ply')

# SAM_ARCH = 'vit_h'
# SAM_CKPT_PATH = '/data/sxj/dependencies/sam_ckpt/sam_vit_h_4b8939.pth'

In [3]:
parser = ArgumentParser(description="Render script parameters")
model = ModelParams(parser, sentinel=True)
# op = OptimizationParams(parser)
pipeline = PipelineParams(parser)
hp = ModelHiddenParams(parser)
parser.add_argument("--iteration", default=-1, type=int)
parser.add_argument('--mode', default='scene', choices=['scene', 'feature'])
parser.add_argument("--configs", type=str, default = "./arguments/dynerf/cut_roasted_beef.py")
# parser.add_argument('--precomputed_mask', default=None, type=str)
args = get_combined_args(parser, MODEL_PATH, 'scene')
if args.configs:
    import mmcv
    from utils.params_utils import merge_hparams
    config = mmcv.Config.fromfile(args.configs)
    args = merge_hparams(args, config)

dataset = model.extract(args)
hyperparam = hp.extract(args)
dataset.object_masks = True
dataset.need_gt_masks = True

gaussians = GaussianModel(dataset.sh_degree, args.mode, hyperparam)
scene = Scene(dataset, gaussians, load_iteration=args.iteration, mode=args.mode)

Looking for config file in ./output/dynerf/cut_roasted_beef/cfg_args
Config file found at: ./output/dynerf/cut_roasted_beef/cfg_args
mode:  scene
Loading trained model at iteration 14000
meta data loaded, total image:5700
meta data loaded, total image:300
load finished. Train Dataset Length: 5700


100%|██████████| 5700/5700 [00:00<00:00, 184829.67it/s]
300it [00:00, 39452.29it/s]


origin points, 37243
after points, 37243
Loading Training Cameras
Loading Test Cameras
Loading Video Cameras
Deformation Net Set aabb [21.35526085 15.11961842 50.36367798] [-25.87877274 -14.45879078   5.38203239]
Voxel Plane: set aabb= Parameter containing:
tensor([[ 21.3553,  15.1196,  50.3637],
        [-25.8788, -14.4588,   5.3820]])
loading model from exists./output/dynerf/cut_roasted_beef/point_cloud/iteration_14000


In [4]:
bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
bg_color = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

video_cameras = scene.getVideoCameras()
train_cams = scene.getTrainCameras()
cam_type = scene.dataset_type
print("There are",len(train_cams),"views in the dataset.")

There are 5700 views in the dataset.


In [5]:
with torch.no_grad():
    view = train_cams[5399]
    
    gt_mask = (view.objects != 0).int().cuda()
    gt_mask = torch.any(gt_mask, dim=-1).int()
    
    render_pkg = render(view, gaussians, pipeline, bg_color, cam_type=scene.dataset_type)
    points2d = render_pkg["points2d"].round().long()
    mask3d = (points2d[:, 1] >= 0) & (points2d[:, 1] < view.image_height) & (points2d[:, 0] >= 0) & (points2d[:, 0] < view.image_width)
    mask = mask3d
    visible_points2d = points2d[mask3d]
    points2d_values = gt_mask[visible_points2d[:, 1], visible_points2d[:, 0]]
    
    mask3d[mask3d.clone()] = (points2d_values == 1)

In [19]:
render_images = []
with torch.no_grad():
    for idx, view in enumerate(tqdm(video_cameras, desc="Rendering progress")):
        if idx == 0: time1 = time.time()
        
        # # nearest interpolate
        # diff = torch.abs(gaussians._time_map - view.time)
        # index = torch.argmin(diff)
        # mask = gaussians._mask_table[index]
            
        rendering = render_segmentation(view, gaussians, pipeline, bg_color, ~mask3d.bool())["render"]
        render_images.append(to8b(rendering.detach()).transpose(1,2,0))

time2 = time.time()
print("FPS:", len(video_cameras) / (time2 - time1))
torch.cuda.empty_cache()
    
imageio.mimwrite(os.path.join(MODEL_PATH, 'video', "ours_{}".format(GAUSSIAN_ITERATION), 'video_seg_man.mp4'), render_images, fps=30)

Rendering progress:   0%|          | 0/300 [00:00<?, ?it/s]

Rendering progress: 100%|██████████| 300/300 [00:09<00:00, 32.34it/s]


FPS: 32.344379595861824




In [6]:
gaussians.create_mask_table(len(train_cams))
# viewpoint_stack = [i for i in train_cams]

In [7]:
with torch.no_grad():
    time1 = time.time()
    for idx, view in enumerate(tqdm(train_cams)):
        # gaussians._time_map[idx] = view.time
        diff = torch.abs(gaussians._time_map - view.time)
        index = torch.argmin(diff)
        # if index != 0: continue
        gt_mask = (view.objects != 0).int().cuda()
        gt_mask = torch.any(gt_mask, dim=-1).int()
        
        render_pkg = render(view, gaussians, pipeline, bg_color, cam_type=scene.dataset_type)
        points2d = render_pkg["points2d"].round().long()
        mask3d = (points2d[:, 1] >= 0) & (points2d[:, 1] < view.image_height) & (points2d[:, 0] >= 0) & (points2d[:, 0] < view.image_width)
        visible_points2d = points2d[mask3d]
        points2d_values = gt_mask[visible_points2d[:, 1], visible_points2d[:, 0]]
        
        gaussians._mask_table[index][mask3d] += (points2d_values == 1).float()
    
    mask_table = gaussians._mask_table
    # gaussians._mask_table /= 19
    # gaussians._mask_table = gaussians._mask_table >= 0.5
    
    time2 = time.time()
    print("time:", time2 - time1)

  0%|          | 0/5700 [00:00<?, ?it/s]

100%|██████████| 5700/5700 [08:46<00:00, 10.84it/s]

time: 526.0199625492096





In [16]:
gaussians._mask_table = mask_table / 19
gaussians._mask_table = (gaussians._mask_table >= 0.9)

In [17]:
mask_table.max()

tensor(19., device='cuda:0')

## Inference

In [18]:
render_images = []
with torch.no_grad():
    for idx, view in enumerate(tqdm(video_cameras, desc="Rendering progress")):
        if idx == 0: time1 = time.time()
        
        # nearest interpolate
        diff = torch.abs(gaussians._time_map - view.time)
        index = torch.argmin(diff)
        mask = gaussians._mask_table[index]
            
        rendering = render_segmentation(view, gaussians, pipeline, bg_color, mask.bool())["render"]
        render_images.append(to8b(rendering.detach()).transpose(1,2,0))

time2 = time.time()
print("FPS:", len(video_cameras) / (time2 - time1))
torch.cuda.empty_cache()
    
imageio.mimwrite(os.path.join(MODEL_PATH, 'video', "ours_{}".format(GAUSSIAN_ITERATION), 'video_seg_man.mp4'), render_images, fps=30)

Rendering progress:   0%|          | 0/300 [00:00<?, ?it/s]

Rendering progress: 100%|██████████| 300/300 [00:06<00:00, 45.24it/s]


FPS: 45.26533123352721


