-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathprm.yaml
More file actions
82 lines (68 loc) · 1.48 KB
/
prm.yaml
File metadata and controls
82 lines (68 loc) · 1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
base_model: Qwen/Qwen2.5-Math-7B-Instruct
model_type: AutoModelForTokenClassification
num_labels: 2
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: false
strict: false
liger_rms_norm: true
liger_glu_activation: true
process_reward_model: true
chat_template:
datasets:
- path: axolotl-ai-co/prm800k_phase_2
type: stepwise_supervised
step_separator: "\n\n"
max_completion_length:
train_on_last_step_only: false
splt: train
- path: axolotl-ai-co/prm800k_phase_1
type: stepwise_supervised
step_separator: "\n\n"
max_completion_length:
train_on_last_step_only: false
splt: train
output_dir: ./outputs/out
remove_unused_columns: false
sequence_len: 4096
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 1
micro_batch_size: 8
eval_batch_size: 64
num_epochs: 1
optimizer: adamw_torch_fused
lr_scheduler: cosine
learning_rate: 5e-5
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true
torch_compile: true
warmup_ratio: 0.1
evals_per_epoch: 10
eval_table_size:
eval_max_new_tokens: 128
saves_per_epoch: 10
debug:
deepspeed: deepspeed_configs/zero1.json
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens: