In [None]:

from occupancy.datasets.nuscenes import NuScenesDataset, NuScenesOccupancyDataset
from nuscenes.nuscenes import NuScenes
nusc = NuScenesOccupancyDataset(
    data_dir='/mnt/f/datasets/nuscenes/nuScenes-Occupancy-v0.1/', binary=False)


In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import Tensor

In [None]:
def occ_shuffle(occ: Tensor, cube_size: int = 32, shuffle_rato: float = 0.2):
    total_cubes = (occ.shape[-3] // cube_size) * (occ.shape[-2] // cube_size) * (occ.shape[-1] // cube_size)
    total_shuffle = int(total_cubes * shuffle_rato)
    cubes = torch.zeros(occ.shape[0], occ.shape[1], total_cubes, cube_size, cube_size, cube_size)
    for i in range(occ.shape[-3] // cube_size):
        for j in range(occ.shape[-2] // cube_size):
            for k in range(occ.shape[-1] // cube_size):
                ind = i * (occ.shape[-2] // cube_size) * (occ.shape[-1] // cube_size) + j * (occ.shape[-1] // cube_size) + k
                cubes[:, :, ind] = occ[:, :, i * cube_size:(i + 1) * cube_size, j * cube_size:(j + 1) * cube_size, k * cube_size:(k + 1) * cube_size] 
    ind_to_shuffle = torch.randperm(total_cubes)[:total_shuffle]
    shuffle_ind = torch.randperm(total_shuffle)
    cubes[:, :, ind_to_shuffle] = cubes[:, :, ind_to_shuffle][:, :, shuffle_ind]
    shuffled = torch.zeros_like(occ)
    for i in range(occ.shape[-3] // cube_size):
        for j in range(occ.shape[-2] // cube_size):
            for k in range(occ.shape[-1] // cube_size):
                ind = i * (occ.shape[-2] // cube_size) * (occ.shape[-1] // cube_size) + j * (occ.shape[-1] // cube_size) + k
                shuffled[:, :, i*cube_size:(i+1)*cube_size, j*cube_size:(j+1)*cube_size, k*cube_size:(k+1)*cube_size] = cubes[:, :, ind]
    return shuffled
                
    

In [None]:
occ = nusc[12]
occ = F.interpolate(occ.unsqueeze(0).float(), scale_factor=2, mode='trilinear', align_corners=True).squeeze(0).argmax(0)
occ = F.one_hot(occ, num_classes=18).permute(3, 0, 1, 2).argmax(0)
occ = occ_shuffle(occ[None, None, ...], 64,0.2)[0, 0]
i,j,k = torch.where(occ)
c = occ[i,j,k]
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(i,j,k, c=c, marker='s', s=1)
ax.set_xlim(0, occ.shape[-3])
ax.set_ylim(0, occ.shape[-2])
ax.set_zlim(0, occ.shape[-1])
ax.set_box_aspect((1,1,occ.shape[-1]/occ.shape[-2]))


In [None]:
from diffusers import AutoencoderKL

vae = AutoencoderKL.from_pretrained('stabilityai/sdxl-vae')

In [None]:

from occupancy.datasets.nuscenes import NuScenesDataset, NuScenesOccupancyDataset
from nuscenes.nuscenes import NuScenes
nusc = NuScenesDataset(
    data_dir='/mnt/f/datasets/nuscenes/')

In [None]:
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torchvision.transforms.v2.functional as TF
import torch

data = nusc[0]

In [None]:
occ0 = data.lidar_top.occupancy
occ0.shape

In [None]:
def show_occ(occ):
    with plt.ioff():
        fig = plt.figure(figsize=(10,10))
        ax = fig.add_subplot(111, projection='3d')
        i,j,k = torch.where(occ[0, 0])
        ax.scatter(i,j,k, marker='s', s=1, c=k)
        ax.set_xlim(0, occ.shape[-3])
        ax.set_ylim(0, occ.shape[-2])
        ax.set_zlim(0, occ.shape[-1])
        ax.set_box_aspect((1,1,occ.shape[-1]/occ.shape[-2]))
        return fig

occ0_fig = show_occ(occ0)
occ0_fig

In [None]:

def build_kernel(size: int, sigma: float) -> torch.Tensor:
    grid = torch.arange(size, dtype=torch.float32)
    grid -= (size - 1) / 2
    grid = grid / sigma
    grid = grid ** 2
    grid = -0.5 * grid
    grid = torch.exp(grid)
    grid = grid / grid.sum()
    return grid

def occ_approx_roi(occ):
    #occ = occ.argmax(dim=1, keepdim=True)
    occ = occ > 0
    gaussian_blur_kernel3d = build_kernel(7, 1)
    gaussian_blur_kernel3d = gaussian_blur_kernel3d[None, :] * gaussian_blur_kernel3d[:, None] * gaussian_blur_kernel3d[:, None, None]
    gaussian_blur_kernel3d = gaussian_blur_kernel3d / gaussian_blur_kernel3d.sum()
    gaussian_blur_kernel3d = gaussian_blur_kernel3d[None, None, ...]
    mask = F.conv3d(occ.float(), gaussian_blur_kernel3d.to(device=occ.device), padding=3)
    mask = mask > 0
    mask = mask.any(dim=-1, keepdim=True).expand_as(mask)
    return mask

In [None]:
import torch
dinov2 = torch.hub.load("facebookresearch/dinov2", "dinov2_vitb14", trust_repo=True, skip_validation=True)

In [None]:
exportable = torch.export.export(dinov2, (torch.rand(1, 3, 224, 224),))

In [None]:
occ0.shape

In [None]:
occ1 = occ_approx_roi(occ0.cuda()).cpu()
#occ1 = occ1.any(dim=-1, keepdim=True).expand_as(occ1)

In [None]:
occ1.sum()

In [None]:
print(occ1.shape)

In [1]:
import torch
import torch.nn.functional as F
import os
from torch import nn, Tensor
torch.hub.set_dir(os.path.join(os.curdir, ".torch"))
image_feature = torch.hub.load(
    "facebookresearch/dinov2", "dinov2_vitb14", trust_repo=True, skip_validation=True
)
image_feature.eval()
image_feature = image_feature.cuda()

Using cache found in ./.torch/facebookresearch_dinov2_main
  _torch_pytree._register_pytree_node(


In [2]:
from occupancy.pipelines.panoramic2voxel import LinearCategoricalDeformation
from occupancy.models.transformer import ConditionalDecoderLayer, DecoderLayer

In [3]:
from collections import defaultdict
from functools import partial

class BEVLinearCategoricalDeformation(nn.Module):
    def __init__(self, feature_extrator):
        super().__init__()
        self.feature_extrator = feature_extrator
        for i, block in enumerate(self.feature_extrator.blocks):
            setattr(block, "_block_index", i)
            block.register_forward_hook(self._bev_linear_categorical_deformation_hook)
        hidden_size = 768
        df_size = 32 * 32 * 4
        self.num_layers = len(self.feature_extrator.blocks)
        self._bev_features = defaultdict(list)
        self._last_feats = [None for _ in range(self.num_layers)]
        self.deformations = nn.ModuleList([LinearCategoricalDeformation(hidden_size, hidden_size, deformative_size=df_size) for _ in range(self.num_layers)])
        self.attentions = nn.ModuleList([ConditionalDecoderLayer(hidden_size, hidden_size // 64, 64) for _ in range(self.num_layers - 1)])
        
    
    def _bev_linear_categorical_deformation_hook(self, module, input, output):
        block_index = getattr(module, "_block_index")
        df_feat = self.deformations[block_index](output)
        self._bev_features[block_index].append(df_feat)
        if len(self._bev_features[block_index]) == 6:
            bev_feat = sum(self._bev_features[block_index])
            self._last_feats[block_index] = bev_feat
            self._bev_features[block_index] = []
            if block_index > 0:
                self._last_feats[block_index] = self.attentions[block_index - 1](self._last_feats[block_index - 1], self._last_feats[block_index])
                self._last_feats[block_index - 1] = None
                
                
    def forward(self, x: Tensor) -> Tensor:
        for i in x.unbind(1):
            self.feature_extrator(i)
        return self._last_feats.pop()
    
image_feature.to('cuda')
image_feature.requires_grad_(False)
model = BEVLinearCategoricalDeformation(image_feature)
model.to('cuda').bfloat16()
x = torch.rand(1, 6, 3, 518, 518).cuda().bfloat16()
model(x)

tensor([[[ 22.7500, -29.7500,  20.0000,  ..., -24.7500,   3.9844,  28.6250],
         [-51.2500, -28.8750,  24.0000,  ...,  28.3750,  -4.9062,   3.3281],
         [-56.5000,  -4.9688,   7.5625,  ...,  17.0000, -15.8750,  -4.9375],
         ...,
         [ 33.2500, -30.3750, -19.5000,  ..., -30.5000,  16.8750,  -7.7500],
         [ 38.0000, -39.5000,   5.4688,  ..., -22.6250,   2.4219,   3.7969],
         [ 36.2500, -35.0000,  -3.8906,  ..., -11.3750, -36.2500,  51.7500]]],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<AddBackward0>)

In [4]:
model._last_feats

[None, None, None, None, None, None, None, None, None, None, None]

In [None]:
from occupancy import ops
def occ_approx_roi(occ: Tensor):
    
    occ = occ.argmax(0)
    occ = occ != 0
    F.gaussian_blur(occ.float(), kernel_size=3, sigma=2)


In [None]:
dinov2_vitl14.forward_features(img)

In [None]:
import torch
from occupancy.pipelines.autoencoderkl_3d import AutoEncoderKL3d
import torch_tensorrt
model = AutoEncoderKL3d(18, 18, 64, 64, 2, 3)


In [None]:
dinov2_vitl14

In [None]:
preds.shape

In [None]:
import numpy as np
from PIL import Image
import cv2
import torch
import os
os.chdir('/home/zc2309/workspace/occupancy/Depth_Anything')
from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
from torchvision.transforms import Compose
model = DepthAnything.from_pretrained("LiheYoung/depth_anything_vitl14")

transform = Compose([
        Resize(
            width=518,
            height=518,
            resize_target=False,
            keep_aspect_ratio=True,
            ensure_multiple_of=14,
            resize_method='lower_bound',
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
image = np.array(image) / 255.0
image = transform({'image': image})['image']
image = torch.from_numpy(image).unsqueeze(0).cuda()
model.cuda()

depth = model(image)


In [None]:
depth[0].max()

In [None]:
import torchvision.transforms.v2.functional as TF
import matplotlib.pyplot as plt
plt.imshow(TF.to_pil_image(depth[0].cpu(), mode='F'))

In [None]:
import torch
torch.export.export()

In [None]:
import torch
from occupancy.ops import view_as_cartesian, view_as_polar


x = torch.rand(1, 1, 64, 64, 64)
z = view_as_polar(x, (512, 512, 512), mode='bilinear')
x_ = view_as_cartesian(z, (64, 64, 64), mode='bilinear')

In [None]:
(x - x_).abs().max()