# DINOv2 2D–3D 对齐调试

本 notebook 用单帧 `scene0000_00 / 200` 检查 DINOv2 特征在 2D 图像与 3D 点云上的几何对齐情况。

本轮只实现 **部分 A：2D DINO 特征 (clip_pix)**：

1. **检查 1：2D PCA（patch → 480×640）**  
   - 读取离线 DINO 特征 `(1024, 35, 46)` 和 RGB 图 `(480,640)`；  
   - 使用 Concerto 风格 PCA 得到 35×46 伪彩图，再通过 Kronecker 上采样到约 490×644，裁剪回 480×640；  
   - 与原 RGB 并排显示，检查 35×46 patch 网格与 480×640 像素的对应关系是否合理。

2. **检查 2：DINO 点级特征（patch → 点云 → 3D → 2D）**  
   - 使用与模型完全一致的函数：  
     `project_points_to_uv(xyz_cam, feat_hw=(35,46), max_depth, SCANET_INTRINSICS)`，  
     `sample_img_feat(clip_pix.unsqueeze(0), uv, valid, align_corners=False)`；  
   - 得到点级 DINO 特征 `(N_points, 1024)`，PCA 后在 3D 点云上上色；  
   - 再用 `project_points_to_uv(xyz_cam, feat_hw=(480,640))` + `splat_to_grid` 将彩色点云 rasterize 回 2D 图；  
   - 与检查 1 的 2D PCA 结果对比，若颜色分布近似，则认为「2D DINO ↔ 3D 点云」投影几何是自洽的。

In [None]:
import sys, os
from pathlib import Path

import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import open3d as o3d

# 确保可以 import oneformer3d / vis_demo 下的工具
ROOT = Path('/home/nebula/xxy/3D_Reconstruction')
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from oneformer3d.projection_utils import (
    SCANET_INTRINSICS,
    project_points_to_uv,
    sample_img_feat,
    splat_to_grid,
)
from vis_demo.vis_dino_2d_pca import pca_color_from_nxc

print('CWD =', os.getcwd())
print('Using ROOT =', ROOT)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device =', device)

# 固定样本：scene0000_00 / frame 200
SCENE = 'scene0000_00'
FRAME = 200

RGB_PATH = ROOT / 'data/scannet200-sv/2D' / SCENE / 'color' / f'{FRAME}.jpg'
DINO_PATH = ROOT / 'data/scannet200-sv/clip_feat' / SCENE / f'{FRAME}.pt'
POINTS_PATH = ROOT / 'data/scannet200-sv/points' / f'{SCENE}_{FRAME}.bin'
POSE_PATH = ROOT / 'data/scannet200-sv/pose_centered' / SCENE / f'{FRAME}.npy'

print('RGB_PATH   =', RGB_PATH)
print('DINO_PATH  =', DINO_PATH)
print('POINTS_PATH=', POINTS_PATH)
print('POSE_PATH  =', POSE_PATH)

In [None]:
# 检查 1：2D PCA（patch → 480×640）

# 1) 读取原始 RGB
img = Image.open(RGB_PATH).convert('RGB')
W_orig, H_orig = img.size  # (640,480)
print(f'Original image size (W,H) = ({W_orig},{H_orig})')

# 2) 读取 DINO 特征 (C, H_p, W_p)
dino_obj = torch.load(DINO_PATH, map_location='cpu')
if isinstance(dino_obj, dict):
    # 尝试常见 key
    feat = None
    for k in ['pix', 'feat', 'clip_pix', 'dino_feat', 'x']:
        v = dino_obj.get(k, None)
        if torch.is_tensor(v):
            feat = v
            print(f'Using dict["{k}"] as DINO feature')
            break
    if feat is None:
        raise RuntimeError(f'No tensor feature found in dict keys={list(dino_obj.keys())}')
else:
    feat = dino_obj

feat = feat.float()
if feat.dim() == 4:
    feat = feat[0]
C, H_p, W_p = feat.shape
print(f'DINO feat shape: C={C}, H_p={H_p}, W_p={W_p}')

if (H_p, W_p) != (35, 46):
    print('[WARN] Expected (35,46) patch grid, got ', (H_p, W_p))

# 3) Concerto 风格 PCA：patch → 35×46 颜色
x = feat.reshape(C, -1).t().float()  # (N, C)
color_patch = pca_color_from_nxc(x)   # (N,3)
img_pca_patch = color_patch.view(H_p, W_p, 3).cpu().numpy()
img_pca_patch = np.clip(img_pca_patch, 0.0, 1.0)

# 4) 上采样到接近 pad 后分辨率，再裁剪回 480×640
PATCH_SIZE = 14  # 与 DINOv2 ViT-14 对齐
img_pca_up = np.kron(
    img_pca_patch,
    np.ones((PATCH_SIZE, PATCH_SIZE, 1), dtype=img_pca_patch.dtype),
)
H_up, W_up, _ = img_pca_up.shape
print(f'Upsampled PCA shape: H_up={H_up}, W_up={W_up}')
img_pca_480x640 = img_pca_up[:H_orig, :W_orig, :]
print('Cropped PCA shape:', img_pca_480x640.shape)

# 5) 可视化 RGB 与 2D PCA
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.imshow(img)
plt.title('RGB')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(img_pca_480x640)
plt.title('DINOv2 PCA (35x46 → 480x640)')
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# 检查 2：DINO 点级特征（patch → 点云 → 3D → 2D）

# 1) 读取点云与位姿
pts_arr = np.fromfile(POINTS_PATH, dtype=np.float32)
if pts_arr.size % 6 != 0:
    raise RuntimeError(f'Unexpected point file size: {POINTS_PATH}, size={pts_arr.size}')
pts = pts_arr.reshape(-1, 6)
xyz = torch.from_numpy(pts[:, :3]).to(device)
print(f'Loaded {pts.shape[0]} points')

pose_cam2world = torch.from_numpy(np.load(POSE_PATH)).float().to(device)
w2c = torch.linalg.inv(pose_cam2world)
xyz_h = torch.cat([xyz, torch.ones_like(xyz[:, :1])], dim=1)  # (N,4)
xyz_cam = (w2c @ xyz_h.t()).t()[:, :3]  # (N,3)

# 2) 将 3D 点投影到 DINO patch 网格 (35x46) 上，采样 DINO 特征
clip_pix = feat.to(device)  # (C, H_p, W_p)
uv_feat, valid = project_points_to_uv(
    xyz_cam.to(torch.float32),
    feat_hw=(H_p, W_p),
    max_depth=20.0,
    standard_intrinsics=SCANET_INTRINSICS,
    debug=True,
    debug_prefix='DINO_35x46',
)

clip_pix_bchw = clip_pix.unsqueeze(0)  # (1,C,H_p,W_p)
sampled_feat = sample_img_feat(clip_pix_bchw, uv_feat, valid, align_corners=False)  # (N,C)
print('sampled_feat shape:', tuple(sampled_feat.shape))
print('valid ratio: {:.3f}'.format(valid.float().mean().item()))

# 3) 对点级 DINO 特征做 PCA，上色 3D 点云
x_pts = sampled_feat[valid].to(torch.float32)  # 只对有效点做 PCA
color_pts = pca_color_from_nxc(x_pts)  # (N_valid,3)
color_full = np.zeros((pts.shape[0], 3), dtype=np.float32)
valid_np = valid.cpu().numpy().astype(bool)
color_full[valid_np] = np.clip(color_pts.cpu().numpy(), 0.0, 1.0)

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(pts[:, :3])
pcd.colors = o3d.utility.Vector3dVector(color_full)
print('Constructed colored point cloud for DINO PCA.')

# 将点云保存为 .ply，便于在外部工具中可视化
out_ply = ROOT / 'vis_demo' / f'{SCENE}_{FRAME}_dino_pca_points_35x46.ply'
out_ply.parent.mkdir(parents=True, exist_ok=True)
o3d.io.write_point_cloud(str(out_ply), pcd)
print('Saved DINO PCA-colored point cloud to', out_ply)

# 如需在 notebook 中交互式查看，可在本地取消下面一行注释：
# o3d.visualization.draw_geometries([pcd])

# 4) 将彩色点云 rasterize 回 480x640 图像，与 2D PCA 对比
uv_img, valid_img = project_points_to_uv(
    xyz_cam.to(torch.float32),
    feat_hw=(H_orig, W_orig),  # 480x640 像素网格
    max_depth=20.0,
    standard_intrinsics=SCANET_INTRINSICS,
    debug=True,
    debug_prefix='DINO_480x640',
)

uv_np = uv_img.cpu().numpy()
z_np = xyz_cam[:, 2].cpu().numpy()
valid_full = (valid_img.cpu().numpy().astype(bool)) & valid_np  # 同时在 35x46 和 480x640 都有效

uv_valid = torch.from_numpy(uv_np[valid_full]).to(torch.float32)
z_valid = torch.from_numpy(z_np[valid_full]).to(torch.float32)
feat_valid = torch.from_numpy(color_full[valid_full]).to(torch.float32)

F2D, cover = splat_to_grid(
    uv=uv_valid,
    z=z_valid,
    feats=feat_valid,
    valid=torch.ones_like(z_valid, dtype=torch.bool),
    H=H_orig,
    W=W_orig,
    mode='bilinear',
)
img_from_pts = F2D.permute(1, 2, 0).cpu().numpy()
img_from_pts = np.clip(img_from_pts, 0.0, 1.0)

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.imshow(img_pca_480x640)
plt.title('2D PCA (patch → 480x640)')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(img_from_pts)
plt.title('PCA from point cloud (3D→2D)')
plt.axis('off')
plt.tight_layout()
plt.show()