Skip to content

Commit

Permalink
refactor: ner_conll2003_torch_bert -> ner_conll2003_bert
Browse files Browse the repository at this point in the history
  • Loading branch information
IgnatovFedor committed Feb 1, 2022
1 parent bb41a58 commit 7fee5e5
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 209 deletions.
133 changes: 88 additions & 45 deletions deeppavlov/configs/ner/ner_conll2003_bert.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,103 +9,146 @@
"class_name": "data_learning_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "bert_ner_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"]
"in": [
"x"
],
"out": [
"x_tokens",
"x_subword_tokens",
"x_subword_tok_ids",
"startofword_markers",
"attention_mask"
]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"unk_token": ["O"],
"unk_token": [
"O"
],
"pad_with_zeros": true,
"save_path": "{NER_PATH}/tag.dict",
"load_path": "{NER_PATH}/tag.dict",
"fit_on": ["y"],
"in": ["y"],
"out": ["y_ind"]
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"fit_on": [
"y"
],
"in": [
"y"
],
"out": [
"y_ind"
]
},
{
"class_name": "bert_sequence_tagger",
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"keep_prob": 0.1,
"bert_config_file": "{BERT_PATH}/bert_config.json",
"pretrained_bert": "{BERT_PATH}/bert_model.ckpt",
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"use_crf": true,
"return_probas": false,
"ema_decay": 0.9,
"encoder_layer_ids": [-1],
"optimizer": "tf.train:AdamOptimizer",
"learning_rate": 1e-3,
"bert_learning_rate": 2e-5,
"min_learning_rate": 1e-7,
"encoder_layer_ids": [
-1
],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [
0.9,
0.999
],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"learning_rate_drop_patience": 30,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"clip_norm": 1.0,
"save_path": "{NER_PATH}/model",
"load_path": "{NER_PATH}/model",
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind"]
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": [
"x_subword_tok_ids",
"attention_mask",
"startofword_markers"
],
"in_y": [
"y_ind"
],
"out": [
"y_pred_ind"
]
},
{
"ref": "tag_vocab",
"in": ["y_pred_ind"],
"out": ["y_pred"]
"in": [
"y_pred_ind"
],
"out": [
"y_pred"
]
}
],
"out": ["x_tokens", "y_pred"]
"out": [
"x_tokens",
"y_pred"
]
},
"train": {
"epochs": 30,
"batch_size": 16,
"metrics": [
{
"name": "ner_f1",
"inputs": ["y", "y_pred"]
"inputs": [
"y",
"y_pred"
]
},
{
"name": "ner_token_f1",
"inputs": ["y", "y_pred"]
"inputs": [
"y",
"y_pred"
]
}
],
"validation_patience": 100,
"val_every_n_batches": 20,

"log_every_n_batches": 20,
"tensorboard_log_dir": "{NER_PATH}/logs",
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": ["valid", "test"],
"class_name": "nn_trainer"
"evaluation_targets": [
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12",
"NER_PATH": "{MODELS_PATH}/ner_conll2003_bert"
"TRANSFORMER": "bert-base-cased",
"MODEL_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert/{TRANSFORMER}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_bert_v1.tar.gz",
"url": "http://files.deeppavlov.ai/0.16/ner/ner_conll2003_torch_bert.tar.gz",
"subdir": "{MODELS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
Expand Down
155 changes: 0 additions & 155 deletions deeppavlov/configs/ner/ner_conll2003_torch_bert.json

This file was deleted.

6 changes: 2 additions & 4 deletions docs/features/models/ner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,13 @@ Here is the list of all available configs:
+------------------------------------------------------------------------+ + +-----------------+------------+------------+
| :config:`ner_rus <ner/ner_rus.json>` | | | 1.0 GB | 5.6 MB | 95.1 |
+------------------------------------------------------------------------+--------------------+----------+-----------------+------------+------------+
| :config:`<ner/ner_ontonotes_bert_mult .json>` | Ontonotes | Multi | 700 MB | 2.0 GB | **87.2** |
| :config:`ner_ontonotes_bert_mult <ner/ner_ontonotes_bert_mult.json>` | Ontonotes | Multi | 700 MB | 2.0 GB | **87.2** |
+------------------------------------------------------------------------+ +----------+-----------------+------------+------------+
| :config:`ner_ontonotes_bert <ner/ner_ontonotes_bert.json>` | | En | 400 MB | 1.3 GB | 87.7 |
+------------------------------------------------------------------------+ + +-----------------+------------+------------+
| :config:`ner_ontonotes <ner/ner_ontonotes.json>` | | | 331 MB | 7.8 MB | 86.7 |
+------------------------------------------------------------------------+--------------------+ +-----------------+------------+------------+
| :config:`ner_conll2003_bert <ner/ner_conll2003_bert.json>` | CoNLL-2003 | | 400 MB | 850 MB | 91.7 |
+------------------------------------------------------------------------+ + +-----------------+------------+------------+
| :config:`ner_conll2003_torch_bert <ner/ner_conll2003_torch_bert.json>` | | | --- | 1.3 GB | 90.7 |
| :config:`ner_conll2003_bert <ner/ner_conll2003_bert.json>` | CoNLL-2003 | | --- | 1.3 GB | 90.7 |
+------------------------------------------------------------------------+ + +-----------------+------------+------------+
| :config:`ner_conll2003 <ner/ner_conll2003.json>` | | | 331 MB | 3.1 MB | 89.9 |
+------------------------------------------------------------------------+ + +-----------------+------------+------------+
Expand Down
4 changes: 1 addition & 3 deletions docs/features/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01
+ + +--------------------------------------------------------------------------------------------+-------------+
| | | :config:`ner_ontonotes.json <ner/ner_ontonotes.json>` | 87.1 |
+---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+
| ConLL-2003 | | :config:`ner_conll2003_bert.json <ner/ner_conll2003_bert.json>` | 91.7 |
+ + +--------------------------------------------------------------------------------------------+-------------+
| | | :config:`ner_conll2003_torch_bert.json <ner/ner_conll2003_torch_bert.json>` | 88.6 |
| ConLL-2003 | | :config:`ner_conll2003_bert.json <ner/ner_conll2003_bert.json>` | 90.7 |
+ + +--------------------------------------------------------------------------------------------+-------------+
| | | :config:`ner_conll2003.json <ner/ner_conll2003.json>` | 89.9 |
+---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+
Expand Down
3 changes: 1 addition & 2 deletions tests/test_quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@
("ner/ner_conll2003.json", "ner_conll2003", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK],
("ner/ner_ontonotes.json", "ner_ontonotes", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK],
("ner/ner_ontonotes_bert_emb.json", "ner_ontonotes_bert_emb", ('TI',)): [ONE_ARGUMENT_INFER_CHECK],
("ner/ner_rus.json", "ner_rus", ('IP',)): [ONE_ARGUMENT_INFER_CHECK],
("ner/ner_conll2003_torch_bert.json", "ner_conll2003_torch_bert", ('IP', 'TI')): [ONE_ARGUMENT_INFER_CHECK]
("ner/ner_rus.json", "ner_rus", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
},
"sentence_segmentation": {
("sentence_segmentation/sentseg_dailydialog.json", "sentseg_dailydialog", ('IP', 'TI')): [
Expand Down

0 comments on commit 7fee5e5

Please sign in to comment.