# DINOv2 特征提取（ScanNet200-SV）

本 notebook 通过调用 `tools/extract_dinov2_features.py` 脚本离线提取 DINOv2 特征，输出到 `data/scannet200-sv/clip_feat` 目录，形状应为 `(1024, 35, 46)`。

In [14]:
# 如果在 Jupyter 中，请先确认当前工作目录是项目根目录：
import sys
sys.path.append('/home/nebula/xxy/3D_Reconstruction')

In [15]:
# 运行整套 ScanNet200-SV DINOv2 特征提取。
# 等价于在终端执行：
#   python tools/extract_dinov2_features.py \
#     --data-root data/scannet200-sv/2D \
#     --output-root data/scannet200-sv/clip_feat \
#     --frame-interval 200 \
#     --device cuda \
#     --dtype fp16 \
#     --arch dinov2_vitl14 \
#     --checkpoint /home/nebula/xxy/dataset/models/dinov2_vitl14_reg4_pretrain.pth \
#     --strict-size
import os, subprocess

# 切到项目根目录
os.chdir("/home/nebula/xxy/3D_Reconstruction")
print("CWD =", os.getcwd())

cmd = [
    'python', 'tools/extract_dinov2_features.py',
    '--data-root', 'data/scannet200-sv/2D',
    '--output-root', '/home/nebula/xxy/dataset/dinov2_feat',
    '--frame-interval', '200',
    '--device', 'cuda',
    '--dtype', 'fp16',
    '--arch', 'dinov2_vitl14',
    '--checkpoint', '/home/nebula/xxy/dataset/models/dinov2_vitl14_reg4_pretrain.pth',
    '--strict-size',
]
print('Running:', ' '.join(cmd))
subprocess.run(cmd, check=True)


CWD = /home/nebula/xxy/3D_Reconstruction
Running: python tools/extract_dinov2_features.py --data-root data/scannet200-sv/2D --output-root /home/nebula/xxy/dataset/dinov2_feat --frame-interval 200 --device cuda --dtype fp16 --arch dinov2_vitl14 --checkpoint /home/nebula/xxy/dataset/models/dinov2_vitl14_reg4_pretrain.pth --strict-size


2025-12-08 16:14:59,248 - INFO - 设备: cuda, dtype: torch.float16, 帧采样间隔: 200
Using cache found in /home/nebula/.cache/torch/hub/facebookresearch_dinov2_main
2025-12-08 16:15:00,249 - INFO - using MLP layer as FFN
2025-12-08 16:15:02,625 - INFO - ✓ 从本地权重加载: /home/nebula/xxy/dataset/models/dinov2_vitl14_reg4_pretrain.pth
2025-12-08 16:15:02,774 - INFO - ✓ 成功加载 dinov2_vitl14
2025-12-08 16:15:02,780 - INFO - 待处理场景: 1513
2025-12-08 16:15:02,780 - INFO - [1/1513] 处理 scene0000_00
2025-12-08 16:15:02,781 - INFO - 处理场景 scene0000_00: 28 帧
2025-12-08 16:15:05,522 - INFO - [2/1513] 处理 scene0000_01    
2025-12-08 16:15:05,524 - INFO - 处理场景 scene0000_01: 30 帧
2025-12-08 16:15:07,520 - INFO - [3/1513] 处理 scene0000_02    
2025-12-08 16:15:07,521 - INFO - 处理场景 scene0000_02: 31 帧
2025-12-08 16:15:09,609 - INFO - [4/1513] 处理 scene0001_00    
2025-12-08 16:15:09,609 - INFO - 处理场景 scene0001_00: 8 帧
2025-12-08 16:15:10,149 - INFO - [5/1513] 处理 scene0001_01  
2025-12-08 16:15:10,150 - INFO - 处理场景 scene0001_01

CompletedProcess(args=['python', 'tools/extract_dinov2_features.py', '--data-root', 'data/scannet200-sv/2D', '--output-root', '/home/nebula/xxy/dataset/dinov2_feat', '--frame-interval', '200', '--device', 'cuda', '--dtype', 'fp16', '--arch', 'dinov2_vitl14', '--checkpoint', '/home/nebula/xxy/dataset/models/dinov2_vitl14_reg4_pretrain.pth', '--strict-size'], returncode=0)

In [19]:
# 快速检查一个样本特征的形状，确认为 (1024, 35, 46)。
import torch, pathlib

feat_path = pathlib.Path('/home/nebula/xxy/dataset/dinov2_feat/scene0000_00/200.pt')
obj = torch.load(feat_path, map_location='cpu')
if isinstance(obj, dict):
    for k, v in obj.items():
        if torch.is_tensor(v):
            print('dict key:', k, 'shape:', tuple(v.shape))
            break
else:
    print('tensor shape:', tuple(obj.shape))


tensor shape: (1024, 35, 46)


In [20]:
# 快速检查一个样本特征的形状，确认为 (1024, 35, 46)。
import torch, pathlib

feat_path = pathlib.Path('/home/nebula/xxy/3D_Reconstruction/vis_demo/200_dinov2_feat.pt')
obj = torch.load(feat_path, map_location='cpu')
if isinstance(obj, dict):
    for k, v in obj.items():
        if torch.is_tensor(v):
            print('dict key:', k, 'shape:', tuple(v.shape))
            break
else:
    print('tensor shape:', tuple(obj.shape))


tensor shape: (1024, 35, 46)


In [18]:
import torch
feat = torch.load("/tmp/dino_test/scene0000_00/200.pt", map_location="cpu")
print(feat.shape)  # 期望: torch.Size([1024, 35, 46])


torch.Size([1024, 35, 46])
