### Importing stuff

In [1]:
import os
from pathlib import Path
import itertools
from enum import Enum
import hashlib
import math
import pickle
import json
import asyncio
import aiohttp
import random
import progressbar

from matplotlib import pyplot as plt
import open3d as o3d
from open3d.visualization import draw_plotly
from mpl_toolkits.mplot3d import Axes3D

import einops
import einx
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

import torch
import torch.nn as nn
import torch.amp as amp
import torch.nn.utils as utils
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, Sampler, RandomSampler, SubsetRandomSampler, BatchSampler
import torchvision
from torchvision.io import read_image, ImageReadMode
from torchvision.utils import save_image
from torchinfo import summary
from torchcodec.decoders import VideoDecoder
import lightning as L
import lightning.pytorch as pl
import lightning.pytorch.callbacks as callbacks

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
from src.datasets.raw_panoptic_dataset import RawPanopticDataset
from src.datasets.raw_plenoptic_dataset import RawPlenopticDataset
from src.datasets.full_dataset import FullDataset

from src.model.pose_encoder import compute_pad, compute_octaves, compute_view_rays

from src.config import load_config

from src.model import PoseEncoder, DVST, latent_aggregators

from src.draw import get_camera_geometry


In [3]:
torch.__version__

'2.7.0+cu126'

In [4]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device


'cuda'

### DVST Config

In [5]:
# To make it easier to pass around and validate configs
config = load_config('res/config.yaml')

config.setup.ddp.rank, config.setup.amp.dtype, config.setup.device

(0, torch.bfloat16, device(type='cuda', index=0))

### Loading dataset

In [6]:
dataset_full = FullDataset(config.train.data.datasets).to(device)
len(dataset_full)

87

### DVST

In [7]:
model = DVST(config=config.model).to(device)


In [8]:
from src.utils import print_num_params

print_num_params(model)


Total params: 7.87M; Trainable params: 7.87M


In [9]:
s = dataset_full[0]
s

{'sources': [{'video': <torchcodec.decoders._video_decoder.VideoDecoder at 0x7fc10f8fbc80>,
   'K': tensor([[1.6547e+03, 0.0000e+00, 9.6000e+02],
           [0.0000e+00, 1.5388e+03, 5.4000e+02],
           [0.0000e+00, 0.0000e+00, 1.0000e+00]], device='cuda:0'),
   'Kinv': tensor([[ 6.0433e-04,  0.0000e+00, -5.8016e-01],
           [ 0.0000e+00,  6.4986e-04, -3.5093e-01],
           [ 0.0000e+00,  0.0000e+00,  1.0000e+00]], device='cuda:0'),
   'R': tensor([[[-0.5296, -0.0115,  0.8482],
            [ 0.6366,  0.6554,  0.4064],
            [-0.5606,  0.7552, -0.3397]]], device='cuda:0'),
   't': tensor([[ -5.6521,  81.6465, 378.2934]], device='cuda:0'),
   'time': tensor([0.0000e+00, 3.3367e-02, 6.6733e-02,  ..., 2.0254e+02, 2.0257e+02,
           2.0260e+02], device='cuda:0'),
   'shape': torch.Size([6073, 3, 1080, 1920])},
  {'video': <torchcodec.decoders._video_decoder.VideoDecoder at 0x7fc0e69e1df0>,
   'K': tensor([[1.4357e+03, 0.0000e+00, 9.6000e+02],
           [0.0000e+00, 1.343

In [10]:
s.n_frames = 10

for i in s.sources:
    i.video = i.video[:s.n_frames][:, :, :64, :64]
    i.shape = torch.Size((s.n_frames, 3, 64, 64))
for i in s.queries:
    i.video = i.video[:s.n_frames][:, :, :64, :64]
    i.shape = torch.Size((s.n_frames, 3, 64, 64))
for i in range(len(s.targets)):
    s.targets[i] = s.targets[i][:s.n_frames][:, :, :64, :64]

s.sources[0].video.shape

torch.Size([10, 3, 64, 64])

In [11]:
with amp.autocast(device_type=device, dtype=torch.bfloat16):
    out = model(s)
out[0].shape

torch.Size([10, 3, 64, 64])

In [12]:
#TODO:
# configure transformer enc and dec layers
# add optimizations checkpointing mixed precision etc
# do first testing of model w small parameters and check how much the pc can handle of it
# create combinations of configs for small experiments
