/
bevfusion_sst.yaml
executable file
·94 lines (82 loc) · 2.1 KB
/
bevfusion_sst.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
voxel_size: [0.15, 0.15, 8]
point_cloud_range: [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]
model:
type: BEVFusion_SST
encoders:
camera:
backbone:
with_cp: true
vtransform:
xbound: [-54.0, 54.0, 0.15]
ybound: [-54.0, 54.0, 0.15]
downsample: 1
lidar:
voxelize:
point_cloud_range: ${point_cloud_range}
voxel_size: ${voxel_size}
max_voxels: [-1, -1]
max_num_points: -1
Voxelization: true
voxel_encoder:
type: DynamicVFE
in_channels: 5
feat_channels: [64, 128]
with_distance: false
voxel_size: ${voxel_size}
with_cluster_center: true
with_voxel_center: true
point_cloud_range: ${point_cloud_range}
middle_encoder:
type: SSTInputLayerV2
window_shape: [16, 16, 1]
sparse_shape: [720, 720, 1]
shuffle_voxels: true
debug: true
pos_temperature: 10000
normalize_pos: false
mute: true
backbone:
type: SSTv2
d_model: [128, 128, 128, 128, 128, 128, 128, 128]
nhead: [8, 8, 8, 8, 8, 8, 8, 8]
num_blocks: 8
dim_feedforward: [256, 256, 256, 256, 256, 256, 256, 256]
output_shape: [720, 720]
num_attached_conv: 3
conv_in_channel: 128
conv_out_channel: 128
checkpoint_blocks: [0, 1, 2, 3]
debug: true
fuser:
type: ConvFuser
in_channels: [80, 128]
out_channels: 128
decoder:
backbone:
in_channels: 128
out_channels: [64, 128, 256]
layer_nums: [3, 5, 5]
layer_strides: [2, 2, 2]
neck:
in_channels: [64, 128, 256]
out_channels: [128, 128, 128]
upsample_strides: [0.5, 1, 2]
heads:
object:
in_channels: 384
train_cfg:
grid_size: [720, 720, 1]
out_size_factor: 4
test_cfg:
grid_size: [720, 720, 1]
out_size_factor: 4
bbox_coder:
out_size_factor: 4
evaluation:
interval: 1
data:
samples_per_gpu: 4
workers_per_gpu: 12
checkpoint_config:
max_keep_ckpts: 3
max_epochs: 6