/
conll2003_de_config.json
67 lines (51 loc) · 6.16 KB
/
conll2003_de_config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
"general": {
"cache_dir": {"value": null, "default": "", "desc": "Path for storing pre-trained models downloaded from s3."},
"data_dir": {"value": null, "default": "data/conll03-de", "desc": "Input directory for downstream task. Should contain train + test (+ dev) files."},
"output_dir": {"value": null, "default": "saved_models", "desc": "Output directory where model predictions and checkpoints will be saved."},
"cuda": {"value": null, "default": true, "desc": "CUDA flag, uses CUDA if available."},
"local_rank": {"value": null, "default": -1, "desc": "If local_rank == -1 -> multiGPU mode on one machine, other values signal distributed computation across several nodes (apex install required)."},
"fp16": {"value": null, "default": false, "desc": "Floating point precision. IF true it halves the precision and therefore the memory requirements, so you can doulbe the batch size."},
"loss_scale": {"value": null, "default": 0, "desc": "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True. 0 (default value): dynamic loss scaling. Positive power of 2: static loss scaling value."},
"seed": {"value": null, "default": 42, "desc": "Random seed for initializations."},
"server_ip": {"value": null, "default": "", "desc": "Can be used for distant debugging."},
"server_port": {"value": null, "default": "", "desc": "Can be used for distant debugging."}
},
"task": {
"name": {"value": null, "default": "CONLL2003", "desc": "Name of task"},
"type": {"value": null, "default": "ner"},
"language": {"value": null, "default": "de"},
"do_eval": {"value": null, "default": true, "desc": "Whether to run eval on the dev set."},
"do_train": {"value": null, "default": true, "desc": "Whether to run training. Can be used to only evaluate on an already trained model."},
"processor_name": {"value": null, "default": "NERProcessor", "desc": "A Dataprocessor that is suited for tabular data. Needs special data parameters defined."},
"dev_split": {"value": null, "default": 0.0, "desc": "Split a dev set from the training set using dev_split as proportion."},
"train_filename": {"value": null, "default": "train.txt", "desc": "Filename for training."},
"dev_filename": {"value": null, "default": "dev.txt", "desc": "Filename for development. Missing in case of GermEval2018."},
"test_filename": {"value": null, "default": "test.txt", "desc": "Filename for testing. It is the submission file from competition."},
"delimiter": {"value": null, "default": "\t", "desc": "Delimiter used to seprate columns in input data."},
"label_list": {"value": null, "default": ["[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH"], "desc": ""},
"metric": {"value": null, "default": "seq_f1", "desc": "Metric used. A f1 scored tailored to sequences of labels."}
},
"parameter": {
"model": {"value": "bert-base-german-cased", "default": null, "desc": "Bert pre-trained model selected in the list: bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese."},
"prediction_head": {"value": null, "default": "TokenClassificationHead", "desc": "Kind of prediction head we use on top of Language Model"},
"lm_output_type": {"value": null, "default": "per_token", "desc": "Language Model output."},
"lower_case": {"value": null, "default": false, "desc": "Set to true if you are using an uncased model."},
"max_seq_len": {"value": null, "default": 128, "desc": "The maximum total input sequence length after WordPiece tokenization. Some GNAD texts even extend beyond 512 tokens."},
"balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."},
"learning_rate": {"value": null, "default": 5e-5, "desc": "The initial learning rate for Adam."},
"warmup_proportion": {"value": null, "default": 0.4, "desc": "Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% of training."},
"epochs": {"value": null, "default": 2, "desc": "Total number of training epochs to perform."},
"batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."},
"gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."},
"embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."},
"layer_dims": {"value": null, "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."}
},
"logging": {
"eval_every": {"value": null, "default": 60, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."},
"mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. https://public-mlflow.deepset.ai/)"},
"mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."},
"mlflow_experiment": {"value": "Public_FARM", "default": null, "desc": "Experiment name used for mlflow"},
"mlflow_run_name": {"value": "conll2003 by config", "default": null, "desc": "Name of the particular run for mlflow"}
}
}