-
Notifications
You must be signed in to change notification settings - Fork 21
/
equiformer_v2_N@12_L@6_M@2_epochs@30.yml
executable file
·108 lines (83 loc) · 3.43 KB
/
equiformer_v2_N@12_L@6_M@2_epochs@30.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
trainer: forces_v2
dataset:
- src: datasets/oc20/s2ef/2M/train/
normalize_labels: True
target_mean: -0.7554450631141663
target_std: 2.887317180633545
grad_target_mean: 0.0
grad_target_std: 2.887317180633545
- src: datasets/oc20/s2ef/all/val_id/
logger: wandb
task:
dataset: trajectory_lmdb_v2
description: "Regressing to energies and forces for DFT trajectories from OCP"
type: regression
metric: force_mae
labels:
- potential energy
grad_input: atomic forces
train_on_free_atoms: True
eval_on_free_atoms: True
hide_eval_progressbar: False
model:
name: equiformer_v2
use_pbc: True
regress_forces: True
otf_graph: True
max_neighbors: 20
max_radius: 12.0
max_num_elements: 90
num_layers: 12
sphere_channels: 128
attn_hidden_channels: 64 # [64, 96] This determines the hidden size of message passing. Do not necessarily use 96.
num_heads: 8
attn_alpha_channels: 64 # Not used when `use_s2_act_attn` is True.
attn_value_channels: 16
ffn_hidden_channels: 128
norm_type: 'layer_norm_sh' # ['rms_norm_sh', 'layer_norm', 'layer_norm_sh']
lmax_list: [6]
mmax_list: [2]
grid_resolution: 18 # [18, 16, 14, None] For `None`, simply comment this line.
num_sphere_samples: 128
edge_channels: 128
use_atom_edge_embedding: True
share_atom_edge_embedding: False # If `True`, `use_atom_edge_embedding` must be `True` and the atom edge embedding will be shared across all blocks.
distance_function: 'gaussian'
num_distance_basis: 512 # not used
attn_activation: 'silu'
use_s2_act_attn: False # [False, True] Switch between attention after S2 activation or the original EquiformerV1 attention.
use_attn_renorm: True # Attention re-normalization. Used for ablation study.
ffn_activation: 'silu' # ['silu', 'swiglu']
use_gate_act: False # [True, False] Switch between gate activation and S2 activation
use_grid_mlp: True # [False, True] If `True`, use projecting to grids and performing MLPs for FFNs.
use_sep_s2_act: True # Separable S2 activation. Used for ablation study.
alpha_drop: 0.1 # [0.0, 0.1]
drop_path_rate: 0.05 # [0.0, 0.05]
proj_drop: 0.0
weight_init: 'uniform' # ['uniform', 'normal']
optim:
batch_size: 4 # 6
eval_batch_size: 4 # 6
grad_accumulation_steps: 1 # gradient accumulation: effective batch size = `grad_accumulation_steps` * `batch_size` * (num of GPUs)
load_balancing: atoms
num_workers: 8
lr_initial: 0.0004 # [0.0002, 0.0004], eSCN uses 0.0008 for batch size 96
optimizer: AdamW
optimizer_params:
weight_decay: 0.001
scheduler: LambdaLR
scheduler_params:
lambda_type: cosine
warmup_factor: 0.2
warmup_epochs: 0.1
lr_min_factor: 0.01
max_epochs: 30
force_coefficient: 100
energy_coefficient: 2
clip_grad_norm: 100
ema_decay: 0.999
loss_energy: mae
loss_force: l2mae
eval_every: 5000
#slurm:
# constraint: "volta32gb"