In [None]:
cd /private/home/ronghanghu/workspace/mmf_nr

## Load ours

In [None]:
import os
import numpy as np
import cv2
import torch
import torchvision
import argparse
import omegaconf
import matplotlib.pyplot as plt
import quaternion
from tqdm import tqdm
from skimage import img_as_float, img_as_ubyte
from PIL import Image

from mmf.utils.env import setup_imports
from mmf.utils.configuration import Configuration
from mmf.utils.build import build_config, build_model
from mmf.common.sample import SampleList, Sample


def get_config_from_opts(opts):
    setup_imports()

    args = argparse.Namespace(config_override=None)
    args.opts = opts

    configuration = Configuration(args)
    config = build_config(configuration)
    return config


def load_model(config, device, ckpt_file=None):
    attributes = config.model_config[config.model]
    # Easy way to point to config for other model
    if isinstance(attributes, str):
        attributes = config.model_config[attributes]

    with omegaconf.open_dict(attributes):
        attributes.model = config.model

    model = build_model(attributes)
    model = model.to(device)
    model.eval()

    if ckpt_file is not None:
        state_dict = torch.load(ckpt_file, map_location=device)["model"]
        try:
            model.load_state_dict(state_dict, strict=True)
        except Exception as e:
            print(e)
            print('retry loading with `strict=False`')
            model.load_state_dict(state_dict, strict=False)

    return model


def build_sample_list(img_0, R_0, T_0, R_1, T_1, image_transform):
    sample = Sample()
    sample.orig_img_0 = torch.tensor(img_0)
    sample.trans_img_0 = image_transform(sample.orig_img_0.permute((2, 0, 1)))
    sample.R_0 = torch.tensor(R_0)
    sample.T_0 = torch.tensor(T_0)
    sample.R_1 = torch.tensor(R_1)
    sample.T_1 = torch.tensor(T_1)
    sample_list = SampleList([sample]).to(device)
    return sample_list


exp_name = "realestate10k_dscale2_stride4ft_lowerL1_200"
ckpt_name = "models/model_40000.ckpt"

opts = [
    f"config=projects/neural_rendering/configs/synsin_realestate10k/{exp_name}.yaml",
    f"datasets=synsin_realestate10k",
    f"model=mesh_renderer",
    f"training.batch_size=1",
    f"model_config.mesh_renderer.return_rendering_results_only=True",
]

device = torch.device("cuda:0")
torch.cuda.set_device(device)

ckpt_file = f"./save/synsin_realestate10k/{exp_name}/{ckpt_name}"
assert os.path.exists(ckpt_file)

config = get_config_from_opts(opts)
model = load_model(config, device, ckpt_file)


image_size = 256
frame_dir = "/checkpoint/ronghanghu/neural_rendering_datasets/realestate10K/RealEstate10K/all_frames/test/"
# normalize with ResNet-50 preprocessing
image_transform = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
)


def load_image(image_path, W):
    image = img_as_float(
        np.array(Image.open(image_path).resize([W, W], Image.BILINEAR))
    ).astype(np.float32)
    return image


def build_realestate10k_RT_from_txt(intrinsics, extrinsics):
    offset = np.array([[2, 0, -1], [0, -2, 1], [0, 0, -1]], dtype=np.float32)

    intrinsics = np.array(intrinsics, dtype=np.float32).reshape(4)
    extrinsics = np.array(extrinsics, dtype=np.float32).reshape(3, 4)

    origK = np.array(
        [
            [intrinsics[0], 0, intrinsics[2]],
            [0, intrinsics[1], intrinsics[3]],
            [0, 0, 1],
        ],
        dtype=np.float32,
    )
    K = np.matmul(offset, origK)
    P = np.matmul(K, extrinsics)

    # map to PyTorch3d coordinates
    P = P.copy()
    # change from Habitat coordinates to PyTorch3D coordinates
    P[0] *= -1  # flip X axis
    P[2] *= -1  # flip Z axis
    R = P[0:3, 0:3].T  # to row major
    T = P[0:3, 3]
    
    return R, T


def take_pic_ours(img_init, intrinsics, extrinsics_init, extrinsics_new, use_inpainting=False, show=True):
    R_0, T_0 = build_realestate10k_RT_from_txt(intrinsics, extrinsics_init)
    R_1, T_1 = build_realestate10k_RT_from_txt(intrinsics, extrinsics_new)
    sample_list = build_sample_list(
        img_0=img_init,
        R_0=R_0,
        T_0=T_0,
        R_1=R_1,
        T_1=T_1,
        image_transform=image_transform
    ).to(device)

    with torch.no_grad():
        rendering_results = model.forward(sample_list)

    out = rendering_results['rgb_1_inpaint'] if use_inpainting else rendering_results['rgba_out_rec_list'][1]
    rgba_out = out[0, ..., :3].cpu().numpy()
    rgba_out = np.clip(rgba_out, 0, 1)
    if show:
        plt.figure()
        plt.imshow(rgba_out[..., :3])
    return rgba_out

## Load SynSin

In [None]:
import matplotlib.pyplot as plt

import quaternion
import numpy as np

import sys; sys.path.append("/private/home/ronghanghu/workspace/synsin/")
import os; os.environ['DEBUG'] = ''

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader

from skimage import img_as_float
from PIL import Image

from models.networks.sync_batchnorm import convert_model
from models.base_model import BaseModel
from options.options import get_model


torch.backends.cudnn.enabled = True

# REALESTATE
SYNSIN_MODEL_PATH = '/private/home/ronghanghu/workspace/synsin/modelcheckpoints/realestate/synsin.pth'
synsin_opts = torch.load(SYNSIN_MODEL_PATH)['opts']
synsin_model = get_model(synsin_opts)
torch_devices = [int(gpu_id.strip()) for gpu_id in synsin_opts.gpu_ids.split(",")]

if 'sync' in synsin_opts.norm_G:
    synsin_model = convert_model(synsin_model)
    synsin_model = nn.DataParallel(synsin_model, torch_devices[0:1]).cuda()
else:
    synsin_model = nn.DataParallel(synsin_model, torch_devices[0:1]).cuda()

#  Load the original model to be tested
synsin_model_to_test = BaseModel(synsin_model, synsin_opts)
synsin_model_to_test.load_state_dict(torch.load(SYNSIN_MODEL_PATH)['state_dict'])
synsin_model_to_test.eval()

print("Loaded model")


# image_size = 256
# frame_dir = "/checkpoint/ronghanghu/neural_rendering_datasets/realestate10K/RealEstate10K/all_frames/test/"
synsin_input_transform = torchvision.transforms.Normalize(
    (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
)


def load_image(image_path, W):
    image = img_as_float(
        np.array(Image.open(image_path).resize([W, W], Image.BILINEAR))
    ).astype(np.float32)
    return image


def build_synsin_realestate10k_batch(intrinsics, extrinsics_src, extrinsics_tgt, src_image, tgt_image):
    offset = np.array([[2, 0, -1], [0, -2, 1], [0, 0, -1]], dtype=np.float32)

    intrinsics = np.array(intrinsics, dtype=np.float32).reshape(4)
    src_pose = extrinsics_src.reshape(3, 4)
    tgt_pose = extrinsics_tgt.reshape(3, 4)

    src_image = synsin_input_transform(torch.tensor(src_image).permute((2, 0, 1)))
    tgt_image = synsin_input_transform(torch.tensor(tgt_image).permute((2, 0, 1)))

    poses = [src_pose, tgt_pose]
    cameras = []

    for pose in poses:

        origK = np.array(
            [
                [intrinsics[0], 0, intrinsics[2]],
                [0, intrinsics[1], intrinsics[3]],
                [0, 0, 1],
            ],
            dtype=np.float32,
        )
        K = np.matmul(offset, origK)

        P = pose
        P = np.matmul(K, P)
        # Merge these together to match habitat
        P = np.vstack((P, np.zeros((1, 4)))).astype(np.float32)
        P[3, 3] = 1

        # Now artificially flip x/ys to match habitat
        Pinv = np.linalg.inv(P)

        cameras += [{
            "P": torch.tensor(P).unsqueeze(0),
            "Pinv": torch.tensor(Pinv).unsqueeze(0),
            "K": torch.eye(4, dtype=torch.float32).unsqueeze(0),
            "Kinv": torch.eye(4, dtype=torch.float32).unsqueeze(0)
        }]
        
    images = torch.stack([
        src_image.unsqueeze(0),
        tgt_image.unsqueeze(0)
    ])

    return {"images": images, "cameras": cameras}


def take_synsin_pic(img_init, intrinsics, extrinsics_init, extrinsics_new, show=True):
    batch = build_synsin_realestate10k_batch(intrinsics, extrinsics_init, extrinsics_new, img_init, img_init)
    iter_data_loader = iter([batch])

    with torch.no_grad():
        _, pred_imgs, batch = synsin_model_to_test(
            iter_data_loader, isval=True, return_batch=True
        )

    rgb_out = pred_imgs["PredImg"][0].detach().cpu().numpy().transpose(1, 2, 0)
    rgb_out = np.clip(rgb_out, 0, 1)
    if show:
        plt.figure()
        plt.imshow(rgb_out)
    return rgb_out

## Load Im2Im

In [None]:
IM2IM_MODEL_PATH = '/private/home/ronghanghu/workspace/synsin/modelcheckpoints/realestate/viewappearance.pth'
im2im_opts = torch.load(IM2IM_MODEL_PATH)['opts']
im2im_model = get_model(im2im_opts)
torch_devices = [int(gpu_id.strip()) for gpu_id in im2im_opts.gpu_ids.split(",")]

if 'sync' in im2im_opts.norm_G:
    im2im_model = convert_model(im2im_model)
    im2im_model = nn.DataParallel(im2im_model, torch_devices[0:1]).cuda()
else:
    im2im_model = nn.DataParallel(im2im_model, torch_devices[0:1]).cuda()

#  Load the original model to be tested
im2im_model_to_test = BaseModel(im2im_model, im2im_opts)
im2im_model_to_test.load_state_dict(torch.load(IM2IM_MODEL_PATH)['state_dict'])
im2im_model_to_test.eval()


def take_im2im_pic(img_init, intrinsics, extrinsics_init, extrinsics_new, show=True):
    batch = build_synsin_realestate10k_batch(intrinsics, extrinsics_init, extrinsics_new, img_init, img_init)
    iter_data_loader = iter([batch])

    with torch.no_grad():
        _, pred_imgs, batch = im2im_model_to_test(
            iter_data_loader, isval=True, return_batch=True
        )

    rgb_out = pred_imgs["PredImg"][0].detach().cpu().numpy().transpose(1, 2, 0)
    rgb_out = np.clip(rgb_out, 0, 1)
    if show:
        plt.figure()
        plt.imshow(rgb_out)
    return rgb_out

## Camera transform functions

In [None]:
def extrinsics2RT_orig(extrinsics):
    extrinsics = np.array(extrinsics, dtype=np.float32).reshape(3, 4)

    # map to PyTorch3d coordinates
    P_orig = extrinsics.copy()
    # change from Habitat coordinates to PyTorch3D coordinates
    P_orig[0] *= -1  # flip X axis
    P_orig[2] *= -1  # flip Z axis
    R_orig = P_orig[0:3, 0:3].T  # to row major
    T_orig = P_orig[0:3, 3]
    
    _check_valid_rotation_matrix(R_orig)
    return R_orig, T_orig


def RT_orig2extrinsics(R_orig, T_orig):
    _check_valid_rotation_matrix(R_orig)

    P_orig = np.hstack((R_orig.T, T_orig[..., None]))

    P_orig = P_orig.copy()
    P_orig[0] *= -1  # flip X axis
    P_orig[2] *= -1  # flip Z axis
    extrinsics = P_orig.reshape(-1)
    return extrinsics


def _check_valid_rotation_matrix(R, tol: float = 1e-4):
    """
    Determine if R is a valid rotation matrix by checking it satisfies the
    following conditions:
    ``RR^T = I and det(R) = 1``
    Args:
        R: an (N, 3, 3) matrix
    Returns:
        None
    Emits a warning if R is an invalid rotation matrix.
    """
    eye = np.eye(3)
    orthogonal = np.all(np.abs(R @ R.T - eye) < tol)
    no_distortion = np.abs(np.linalg.det(R) - 1) < tol

    if not (orthogonal and no_distortion):
        raise Exception("R is not a valid rotation matrix")
    return


def _get_habitat_position_rotation(R, T):
    P = np.eye(4, dtype=np.float32)
    P[0:3, 0:3] = R.T
    P[0:3, 3] = T
    
    # change from Habitat coordinates to PyTorch3D coordinates
    P[0] *= -1  # flip X axis
    P[2] *= -1  # flip Z axis
    
    Pinv = np.linalg.inv(P)
    position = Pinv[0:3, 3]
    rotation = Pinv[0:3, 0:3]
    rotation = quaternion.from_rotation_matrix(rotation)
    
    return position, rotation

    
def _get_pytorch3d_camera_RT(position, rotation):
    rotation = quaternion.as_rotation_matrix(rotation)

    Pinv = np.eye(4, dtype=np.float32)
    Pinv[0:3, 0:3] = rotation
    Pinv[0:3, 3] = position
    P = np.linalg.inv(Pinv)

    # change from Habitat coordinates to PyTorch3D coordinates
    P[0] *= -1  # flip X axis
    P[2] *= -1  # flip Z axis

    R = P[0:3, 0:3].T  # to row major
    T = P[0:3, 3]

    return R, T


def _h_rotate_camera(R_in, T_in, degree_right):
    position_in, rotation_in = _get_habitat_position_rotation(R_in, T_in)
    angle = -degree_right * np.pi / 180

    horizontal_rotation = quaternion.from_float_array(
        [np.cos(angle), 0, np.sin(angle), 0]
    )  # wxyz-format
    rotation_out = horizontal_rotation * rotation_in
    R_out, T_out = _get_pytorch3d_camera_RT(position_in, rotation_out)
    return R_out, T_out


def _v_rotate_camera(R_in, T_in, degree_up):
    position_in, rotation_in = _get_habitat_position_rotation(R_in, T_in)
    angle = -degree_up * np.pi / 180

    horizontal_rotation = quaternion.from_float_array(
        [np.cos(angle), np.sin(angle), 0, 0]
    )  # wxyz-format
    rotation_out = horizontal_rotation * rotation_in
    R_out, T_out = _get_pytorch3d_camera_RT(position_in, rotation_out)
    return R_out, T_out


def _i_rotate_camera(R_in, T_in, degree_in):
    position_in, rotation_in = _get_habitat_position_rotation(R_in, T_in)
    angle = -degree_in * np.pi / 180

    horizontal_rotation = quaternion.from_float_array(
        [np.cos(angle), 0, 0, np.sin(angle)]
    )  # wxyz-format
    # unlike _h_rotate_camera or _v_rotate_camera
    # here we multiply horizontal_rotation after rotation_in
    rotation_out = rotation_in * horizontal_rotation
    R_out, T_out = _get_pytorch3d_camera_RT(position_in, rotation_out)
    return R_out, T_out


def _move_camera(R_in, T_in, front, up, right, distance):
    position_in, rotation_in = _get_habitat_position_rotation(R_in, T_in)

    # transform direction vector from camera to world
    direction_vec = np.array([right, up, -front], np.float32)
    direction_vec = quaternion.as_rotation_matrix(rotation_in) @ direction_vec

    position_out = position_in + direction_vec * distance
    R_out, T_out = _get_pytorch3d_camera_RT(position_out, rotation_in)
    return R_out, T_out


def _normalize(array):
    return array / np.sqrt(np.sum(array * array))


def _interpolate(ra, rb, theta, t):
    if abs(theta) < 1e-3:
        # the angle is too small, 1 / sin(theta) will be numerically unstable
        return _normalize((1-t) * ra + t * rb)
    else:
        return _normalize((np.sin((1-t) * theta) * ra + np.sin(t * theta) * rb)  / np.sin(theta))


def _interpolate_rotation_quaternion(rotation_a, rotation_b, num):
    # https://stackoverflow.com/questions/4099369/interpolate-between-rotation-matrices
    ra = quaternion.as_float_array(rotation_a)
    rb = quaternion.as_float_array(rotation_b)
    dot = np.dot(ra, rb)
    if dot < 0:
        dot = -dot
        rb = -rb
    dot = np.clip(dot, -1, 1)
    theta = np.arccos(dot)

    interpolated_rotations = [
        quaternion.from_float_array(_interpolate(ra, rb, theta, t))
        for t in np.linspace(0, 1, num)
    ]
    return interpolated_rotations


def _extrapolate_rotation_quaternion(rotation_a, rotation_b, ratio, num):
    # https://stackoverflow.com/questions/4099369/interpolate-between-rotation-matrices
    ra = quaternion.as_float_array(rotation_a)
    rb = quaternion.as_float_array(rotation_b)
    dot = np.dot(ra, rb)
    if dot < 0:
        dot = -dot
        rb = -rb
    dot = np.clip(dot, -1, 1)
    theta = np.arccos(dot)

    interpolated_rotations = [
        quaternion.from_float_array(_interpolate(ra, rb, theta, t))
        for t in np.linspace(1, 1+ratio, num)
    ]
    return interpolated_rotations


def h_rotate(R, T, angles, cameras, sampling=3):
    if not isinstance(angles, list):
        angles = [angles]
    for n, e in enumerate(angles):
        b = angles[n-1] if n > 0 else 0
        for a in np.linspace(b, e,  max(2, int(np.abs(e-b)*sampling))):
            R_new, T_new = _h_rotate_camera(R, T, a)
            cameras.append((R_new, T_new))

    return R_new, T_new


def v_rotate(R, T, angles, cameras, sampling=3):
    if not isinstance(angles, list):
        angles = [angles]
    for n, e in enumerate(angles):
        b = angles[n-1] if n > 0 else 0
        for a in np.linspace(b, e,  max(2, int(np.abs(e-b)*sampling))):
            R_new, T_new = _v_rotate_camera(R, T, a)
            cameras.append((R_new, T_new))

    return R_new, T_new


def i_rotate(R, T, angles, cameras, sampling=3):
    if not isinstance(angles, list):
        angles = [angles]
    for n, e in enumerate(angles):
        b = angles[n-1] if n > 0 else 0
        for a in np.linspace(b, e,  max(2, int(np.abs(e-b)*sampling))):
            R_new, T_new = _i_rotate_camera(R, T, a)
            cameras.append((R_new, T_new))

    return R_new, T_new


def move(R, T, distances, FUR_ratios, cameras, sampling=20):
    fwd_ratio, up_ratio, right_ratio = FUR_ratios
    if not isinstance(distances, list):
        distances = [distances]
    for n, e in enumerate(distances):
        b = distances[n-1] if n > 0 else 0
        for d in np.linspace(b, e, max(2, int(np.abs(e-b)*sampling))):
            R_new, T_new = _move_camera(R, T, fwd_ratio, up_ratio, right_ratio, d)
            cameras.append((R_new, T_new))

    return R_new, T_new


def interpolate_RTs(R_a, T_a, R_b, T_b, cameras, num):
    position_a, rotation_a = _get_habitat_position_rotation(R_a, T_a)
    position_b, rotation_b = _get_habitat_position_rotation(R_b, T_b)
    positions = np.linspace(position_a, position_b, num)
    rotations = _interpolate_rotation_quaternion(rotation_a, rotation_b, num)

    Rs, Ts = zip(*(_get_pytorch3d_camera_RT(p, r) for p, r in zip(positions, rotations)))
    for R_new, T_new in zip(Rs, Ts):
        cameras.append((R_new, T_new))

    return R_new, T_new


def extrapolate_RTs(R_a, T_a, R_b, T_b, ratio, cameras, num):
    position_a, rotation_a = _get_habitat_position_rotation(R_a, T_a)
    position_b, rotation_b = _get_habitat_position_rotation(R_b, T_b)
    positions = np.linspace(position_b, position_b + (position_b - position_a) * ratio, num)
    rotations = _extrapolate_rotation_quaternion(rotation_a, rotation_b, ratio, num)

    Rs, Ts = zip(*(_get_pytorch3d_camera_RT(p, r) for p, r in zip(positions, rotations)))
    for R_new, T_new in zip(Rs, Ts):
        cameras.append((R_new, T_new))

    return R_new, T_new


def extrinsics_h_rotate(extrinsics, angles, extrinsics_cameras, sampling=3):
    R, T = extrinsics2RT_orig(extrinsics)
    cameras = []
    h_rotate(R, T, angles, cameras, sampling)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]


def extrinsics_v_rotate(extrinsics, angles, extrinsics_cameras, sampling=3):
    R, T = extrinsics2RT_orig(extrinsics)
    cameras = []
    v_rotate(R, T, angles, cameras, sampling)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]


def extrinsics_i_rotate(extrinsics, angles, extrinsics_cameras, sampling=3):
    R, T = extrinsics2RT_orig(extrinsics)
    cameras = []
    i_rotate(R, T, angles, cameras, sampling)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]


def extrinsics_move(extrinsics, distances, FUR_ratios, extrinsics_cameras, sampling=20):
    R, T = extrinsics2RT_orig(extrinsics)
    cameras = []
    move(R, T, distances, FUR_ratios, cameras, sampling)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]


def extrinsics_interpolate_RTs(extrinsics_a, extrinsics_b, extrinsics_cameras, num):
    R_a, T_a = extrinsics2RT_orig(extrinsics_a)
    R_b, T_b = extrinsics2RT_orig(extrinsics_b)
    cameras = []
    interpolate_RTs(R_a, T_a, R_b, T_b, cameras, num)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]


def extrinsics_extrapolate_RTs(extrinsics_a, extrinsics_b, ratio, extrinsics_cameras, num):
    R_a, T_a = extrinsics2RT_orig(extrinsics_a)
    R_b, T_b = extrinsics2RT_orig(extrinsics_b)
    cameras = []
    extrapolate_RTs(R_a, T_a, R_b, T_b, ratio, cameras, num)
    extrinsics_cameras.extend(RT_orig2extrinsics(r, t) for r, t in cameras)
    return extrinsics_cameras[-1]

## Select a scene

In [None]:
def load_data_line(data_line):
    data_line = data_line.strip().split()
    intrinsics = np.array(data_line[3:7], dtype=np.float32) / image_size
    extrinsics_src = np.array(data_line[7:19], dtype=np.float32)
    extrinsics_tgt = np.array(data_line[19:31], dtype=np.float32)
    img_src_file = f"{frame_dir}/{data_line[0]}/{data_line[1]}.png"
    img_tgt_file = f"{frame_dir}/{data_line[0]}/{data_line[2]}.png"
    img_src = load_image(img_src_file, image_size)
    img_tgt = load_image(img_tgt_file, image_size)

    return intrinsics, extrinsics_src, extrinsics_tgt, img_src_file, img_tgt_file, img_src, img_tgt

def _snap_ours(show=True):
    take_pic_ours(img_src, intrinsics, extrinsics_src, extrinsics_new, use_inpainting=use_inpainting, show=show)

def _snap_synsin(show=True):
    take_synsin_pic(img_src, intrinsics, extrinsics_src, extrinsics_new, show=show)

def _snap_im2im(show=True):
    take_im2im_pic(img_src, intrinsics, extrinsics_src, extrinsics_new, show=show)

def _snap(show=True):
    ours_out = take_pic_ours(img_src, intrinsics, extrinsics_src, extrinsics_new, use_inpainting=use_inpainting, show=False)
    synsin_out = take_synsin_pic(img_src, intrinsics, extrinsics_src, extrinsics_new, show=False)
    if run_im2im:
        im2im_out = take_im2im_pic(img_src, intrinsics, extrinsics_src, extrinsics_new, show=False)
    else:
        im2im_out = np.ones_like(ours_out)

    if show:
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 3, 1)
        plt.imshow(im2im_out)
        plt.title("Im2Im")
        plt.axis("off")
        plt.subplot(1, 3, 2)
        plt.imshow(synsin_out)
        plt.title("SynSin")
        plt.axis("off")
        plt.subplot(1, 3, 3)
        plt.imshow(ours_out)
        plt.title("ours")
        plt.axis("off")    
    else:
        return ours_out, synsin_out, im2im_out

In [None]:
intrinsics, extrinsics_src, extrinsics_tgt, img_src_file, img_tgt_file, img_src, img_tgt = load_data_line("""
024152256b6bcac7 90156733 90757333 121.18203 215.434708 128.0 128.0 0.56202 0.144292 -0.81444 -1.998381 -0.055799 0.989037 0.13672 -0.007332 0.825239 -0.031395 0.56391 -2.192661 0.600657 0.139602 -0.787224 -2.398725 -0.053141 0.989431 0.134914 -0.040777 0.797738 -0.039203 0.601728 -2.26991
""")
use_inpainting = False
run_im2im = False

cameras = []
image_key = '_'.join(img_src_file.split('/')[-2:]).replace('.png', ''); print(image_key)
extrinsics_new = extrinsics_src

for _ in range(90):
    cameras.append(extrinsics_new)

_snap();
extrinsics_new = extrinsics_interpolate_RTs(extrinsics_new, extrinsics_tgt, cameras, num=60); _snap();
extrinsics_new = extrinsics_extrapolate_RTs(extrinsics_src, extrinsics_tgt, 1.5, cameras, num=90); _snap();
extrinsics_new = extrinsics_move(extrinsics_new, 3, [-1, 0, 0], cameras, sampling=120); _snap();
extrinsics_new = extrinsics_move(extrinsics_new, -2, [-1, 0, 0], cameras, sampling=120); _snap();
extrinsics_new = extrinsics_h_rotate(extrinsics_new, 15, cameras, sampling=6); _snap();
extrinsics_new = extrinsics_h_rotate(extrinsics_new, -30, cameras, sampling=6); _snap();
extrinsics_new = extrinsics_h_rotate(extrinsics_new, 15, cameras, sampling=6); _snap();
extrinsics_new = extrinsics_interpolate_RTs(extrinsics_new, extrinsics_src, cameras, num=120); _snap();

for _ in range(60):
    cameras.append(extrinsics_new)

In [None]:
use_inpainting = False
run_im2im = False
video_file = f"./save/realestate10k_videos_project_page/{image_key}{'_with_im2im' if run_im2im else ''}.mp4"
os.makedirs(os.path.dirname(video_file), exist_ok=True)

import skimage.io
from skimage import img_as_float32
background_frame = img_as_float32(skimage.io.imread('./tools/scripts/neural_rendering/video_making/data/realestate10k_background_new.png'))

frames = []
frames_im2im = []
frames_synsin = []
frames_ours = []

pad = np.ones((image_size, image_size // 16, 3), dtype=np.float32)
for extrinsics_new in tqdm(cameras):
    ours_out, synsin_out, im2im_out = _snap(show=False)
    if run_im2im:
        concat_out = np.hstack((pad, im2im_out, pad, pad, synsin_out, pad, pad, ours_out, pad))
        frames_im2im.append(im2im_out)
    else:
        concat_out = np.hstack((pad, synsin_out, pad, pad, ours_out, pad))
    frames_synsin.append(synsin_out)
    frames_ours.append(ours_out)

    combined = background_frame.copy()
    combined[image_size // 8:concat_out.shape[0] + image_size // 8] = concat_out
    frames.append(combined)


def write_video(frames, file):
    frame_size = (frames[-1].shape[1], frames[-1].shape[0])
    fourcc = cv2.VideoWriter_fourcc(*"MP4V")
    fps = 60
    writer = cv2.VideoWriter(file, fourcc, fps, frame_size)
    for img in frames:
        # float32 -> uint8, RGB -> BGR
        writer.write(img_as_ubyte(img[..., ::-1]))
    writer.release()
    

write_video(frames, video_file)
# if run_im2im:
#     write_video(frames_im2im, video_file.replace('.mp4', '_im2im.mp4'))
# write_video(frames_synsin, video_file.replace('.mp4', '_synsin.mp4'))
# write_video(frames_ours, video_file.replace('.mp4', '_ours.mp4'))