In [2]:
from dataclasses import asdict

from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
from nerfstudio.data.datamanagers.ad_datamanager import ADDataManagerConfig
from nerfstudio.data.dataparsers.pandaset_dataparser import PandaSetDataParserConfig
from nerfstudio.generative.diffusion_model import DiffusionModelConfig, DiffusionModelId, DiffusionModelType
from nerfstudio.models.neurad import NeuRADModel, NeuRADModelConfig
from nerfstudio.pipelines.diffusion_nerf_pipeline import DiffusionNerfConfig


In [6]:
from PIL import Image
import itertools as it
from pathlib import Path
import torch
import tqdm

def prettify_img(img: torch.Tensor, title: str = ""):
    img = img.detach().cpu().numpy()
    img = (img - img.min()) / (img.max() - img.min())
    pil_image = Image.fromarray((img * 255).astype("uint8")) 
    return pil_image
    

models = [
    "un4", "un4cn4", "un128", "un128cn128"
]
steps = {"un4": 40000, "un4cn4": 40000, "un128": 25000, "un128cn128":  "40000"}

sequence = "001"

GEN_TRAINED_DIFFUSION = True
if GEN_TRAINED_DIFFUSION:
    imgs_dir = Path("experiments/Images")
    experiment_name = "Neurad_Finetuned_Diffusion"
    task_name = "trained_model"
    img_subdir = imgs_dir / experiment_name / task_name / sequence
    img_subdir.mkdir(parents=True, exist_ok=True)

    for model in models:
        print(f"Generating images for model {model}...")
        step = steps[model]
        pipe = DiffusionNerfConfig(
            max_steps=40001,
            calc_fid_steps=tuple(range(0, 40001, 5000)),
            ray_patch_size=(128, 128),
            nerf_checkpoint=f"models/diffusionnerf/finetuned-diffusion-{model}/{sequence}/nerfstudio_models/step-0000{step}.ckpt",
            datamanager=ADDataManagerConfig(
                dataparser=PandaSetDataParserConfig(add_missing_points=True, cameras=("front_left",), sequence=sequence),
                train_num_rays_per_batch=16384,
                eval_num_rays_per_batch=16384,
                num_processes=0
            ),
            model=NeuRADModelConfig(
                eval_num_rays_per_chunk=1 << 15,
                camera_optimizer=CameraOptimizerConfig(mode="off"),  # SO3xR3
                rgb_upsample_factor=4,
            ),
            diffusion_model=DiffusionModelConfig(
                dtype="fp16",
            ),
            augment_phase_step=0,
            augment_strategy="none",
        ).setup(device="cuda")
        pipe.eval()
        dataset = pipe.datamanager.eval_dataset
        
        for img_idx in tqdm.tqdm(range(len(dataset)), desc="Generating images..."):
            camera = dataset.cameras[img_idx:img_idx+1].to(device="cuda")
            img_gt = dataset.get_image_float32(img_idx).to(device="cuda")

            with torch.no_grad():
                img_out = pipe.model.get_outputs_for_camera(camera)

            img_out_pretty = prettify_img(img_out["rgb"])
            img_out_name = f"{img_idx}_{model}.png"
            img_out_path = img_subdir / img_out_name
            img_out_pretty.save(img_out_path)


Generating images for model un4...


Output()

Output()

Output()

Output()

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Generating images...: 100%|██████████| 40/40 [00:39<00:00,  1.00it/s]


Generating images for model un4cn4...


Output()

Output()

Output()

Output()

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Generating images...: 100%|██████████| 40/40 [00:39<00:00,  1.00it/s]


Generating images for model un128...


Output()

Output()

Output()

Output()

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Generating images...: 100%|██████████| 40/40 [00:43<00:00,  1.08s/it]


Generating images for model un128cn128...


Output()

Output()

Output()

Output()

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Generating images...: 100%|██████████| 40/40 [00:40<00:00,  1.02s/it]
