/
train_skd.sh
68 lines (58 loc) · 2.42 KB
/
train_skd.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
# Training script for standard QA training with Cross-Entropy loss and different types of distillation, Kullback-Leibler divergence and MSE loss
script_dir="$(cd "$(dirname "$0")" && pwd)"
source $script_dir/venv/bin/activate
# Constants
kl_type="fw"
seed=3
losses="ce kl"
train_langs="en es de ar vi hi zh"
epochs=3
model=$script_dir/runs/mbert-qa-en
model_type=bert
train_data_types=xquad
train_data_dirs=$script_dir/corpora/xquad
ntl="0 1 2 3 4 5 6"
temps="2 4 8 16"
for ntl in $ntl; do
for temp in $temps; do
out_dir=$script_dir/runs/joint_train-$train_data_types/${train_langs// /-}/mbert-qa-en/ep-$epochs/ntl-$ntl/${losses// /-}-$kl_type-self-distil/temp-$temp/seed-$seed
mkdir -p $out_dir
if [[ ! -f $out_dir/pytorch_model.bin ]]; then
python $script_dir/src/sampler_qa.py \
--train-data-dirs $train_data_dirs \
--train-data-types $train_data_types \
--languages $train_langs \
--num-tgt-langs $ntl \
--seed $seed \
--output-dir $out_dir
logging_dir=$out_dir/tb
mkdir -p $logging_dir
python $script_dir/src/run_squad_w_distillation.py \
--model_type $model_type \
--model_name_or_path $model \
--teacher_type $model_type \
--teacher_name_or_path $model \
--losses $losses \
--kl_type $kl_type \
--temperature $temp \
--do_train \
--self_distil \
--train_file $out_dir/train.json \
--per_gpu_train_batch_size 6 \
--gradient_accumulation_steps 4 \
--max_seq_length 384 \
--num_train_epochs $epochs \
--logging_steps 50 \
--save_steps 10000000 \
--overwrite_output_dir \
--threads 1 \
--seed $seed \
--logging_dir $logging_dir \
--output_dir $out_dir
fi
if [[ ! -f $out_dir/eval_results_$testset ]]; then
bash $script_dir/eval_qa.sh $out_dir mlqa-dev
fi
done
done