diff --git a/deeppavlov/configs/ner/ner_conll2003_bert.json b/deeppavlov/configs/ner/ner_conll2003_bert.json index 2314b8b875..21d338dcff 100644 --- a/deeppavlov/configs/ner/ner_conll2003_bert.json +++ b/deeppavlov/configs/ner/ner_conll2003_bert.json @@ -9,62 +9,102 @@ "class_name": "data_learning_iterator" }, "chainer": { - "in": ["x"], - "in_y": ["y"], + "in": [ + "x" + ], + "in_y": [ + "y" + ], "pipe": [ { - "class_name": "bert_ner_preprocessor", - "vocab_file": "{BERT_PATH}/vocab.txt", + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", "do_lower_case": false, "max_seq_length": 512, "max_subword_length": 15, "token_masking_prob": 0.0, - "in": ["x"], - "out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"] + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] }, { "id": "tag_vocab", "class_name": "simple_vocab", - "unk_token": ["O"], + "unk_token": [ + "O" + ], "pad_with_zeros": true, - "save_path": "{NER_PATH}/tag.dict", - "load_path": "{NER_PATH}/tag.dict", - "fit_on": ["y"], - "in": ["y"], - "out": ["y_ind"] + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] }, { - "class_name": "bert_sequence_tagger", + "class_name": "torch_transformers_sequence_tagger", "n_tags": "#tag_vocab.len", - "keep_prob": 0.1, - "bert_config_file": "{BERT_PATH}/bert_config.json", - "pretrained_bert": "{BERT_PATH}/bert_model.ckpt", + "pretrained_bert": "{TRANSFORMER}", "attention_probs_keep_prob": 0.5, - "use_crf": true, "return_probas": false, - "ema_decay": 0.9, - "encoder_layer_ids": [-1], - "optimizer": "tf.train:AdamOptimizer", - "learning_rate": 1e-3, - "bert_learning_rate": 2e-5, - "min_learning_rate": 1e-7, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, "learning_rate_drop_patience": 30, "learning_rate_drop_div": 1.5, "load_before_drop": true, - "clip_norm": 1.0, - "save_path": "{NER_PATH}/model", - "load_path": "{NER_PATH}/model", - "in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"], - "in_y": ["y_ind"], - "out": ["y_pred_ind"] + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] }, { "ref": "tag_vocab", - "in": ["y_pred_ind"], - "out": ["y_pred"] + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] } ], - "out": ["x_tokens", "y_pred"] + "out": [ + "x_tokens", + "y_pred" + ] }, "train": { "epochs": 30, @@ -72,40 +112,43 @@ "metrics": [ { "name": "ner_f1", - "inputs": ["y", "y_pred"] + "inputs": [ + "y", + "y_pred" + ] }, { "name": "ner_token_f1", - "inputs": ["y", "y_pred"] + "inputs": [ + "y", + "y_pred" + ] } ], "validation_patience": 100, "val_every_n_batches": 20, - "log_every_n_batches": 20, - "tensorboard_log_dir": "{NER_PATH}/logs", "show_examples": false, "pytest_max_batches": 2, "pytest_batch_size": 8, - "evaluation_targets": ["valid", "test"], - "class_name": "nn_trainer" + "evaluation_targets": [ + "valid", + "test" + ], + "class_name": "torch_trainer" }, "metadata": { "variables": { "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", - "NER_PATH": "{MODELS_PATH}/ner_conll2003_bert" + "TRANSFORMER": "bert-base-cased", + "MODEL_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert/{TRANSFORMER}" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_bert_v1.tar.gz", + "url": "http://files.deeppavlov.ai/0.16/ner/ner_conll2003_torch_bert.tar.gz", "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" } ] } diff --git a/deeppavlov/configs/ner/ner_conll2003_torch_bert.json b/deeppavlov/configs/ner/ner_conll2003_torch_bert.json deleted file mode 100644 index 21d338dcff..0000000000 --- a/deeppavlov/configs/ner/ner_conll2003_torch_bert.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/conll2003/", - "dataset_name": "conll2003", - "provide_pos": false - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "torch_transformers_ner_preprocessor", - "vocab_file": "{TRANSFORMER}", - "do_lower_case": false, - "max_seq_length": 512, - "max_subword_length": 15, - "token_masking_prob": 0.0, - "in": [ - "x" - ], - "out": [ - "x_tokens", - "x_subword_tokens", - "x_subword_tok_ids", - "startofword_markers", - "attention_mask" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "unk_token": [ - "O" - ], - "pad_with_zeros": true, - "save_path": "{MODEL_PATH}/tag.dict", - "load_path": "{MODEL_PATH}/tag.dict", - "fit_on": [ - "y" - ], - "in": [ - "y" - ], - "out": [ - "y_ind" - ] - }, - { - "class_name": "torch_transformers_sequence_tagger", - "n_tags": "#tag_vocab.len", - "pretrained_bert": "{TRANSFORMER}", - "attention_probs_keep_prob": 0.5, - "return_probas": false, - "encoder_layer_ids": [ - -1 - ], - "optimizer": "AdamW", - "optimizer_parameters": { - "lr": 2e-05, - "weight_decay": 1e-06, - "betas": [ - 0.9, - 0.999 - ], - "eps": 1e-06 - }, - "clip_norm": 1.0, - "min_learning_rate": 1e-07, - "learning_rate_drop_patience": 30, - "learning_rate_drop_div": 1.5, - "load_before_drop": true, - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "in": [ - "x_subword_tok_ids", - "attention_mask", - "startofword_markers" - ], - "in_y": [ - "y_ind" - ], - "out": [ - "y_pred_ind" - ] - }, - { - "ref": "tag_vocab", - "in": [ - "y_pred_ind" - ], - "out": [ - "y_pred" - ] - } - ], - "out": [ - "x_tokens", - "y_pred" - ] - }, - "train": { - "epochs": 30, - "batch_size": 16, - "metrics": [ - { - "name": "ner_f1", - "inputs": [ - "y", - "y_pred" - ] - }, - { - "name": "ner_token_f1", - "inputs": [ - "y", - "y_pred" - ] - } - ], - "validation_patience": 100, - "val_every_n_batches": 20, - "log_every_n_batches": 20, - "show_examples": false, - "pytest_max_batches": 2, - "pytest_batch_size": 8, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "TRANSFORMER": "bert-base-cased", - "MODEL_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert/{TRANSFORMER}" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/0.16/ner/ner_conll2003_torch_bert.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} diff --git a/docs/features/models/ner.rst b/docs/features/models/ner.rst index 8ebbd0b834..001a1d1f3a 100644 --- a/docs/features/models/ner.rst +++ b/docs/features/models/ner.rst @@ -40,15 +40,13 @@ Here is the list of all available configs: +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`ner_rus ` | | | 1.0 GB | 5.6 MB | 95.1 | +------------------------------------------------------------------------+--------------------+----------+-----------------+------------+------------+ - | :config:`` | Ontonotes | Multi | 700 MB | 2.0 GB | **87.2** | + | :config:`ner_ontonotes_bert_mult ` | Ontonotes | Multi | 700 MB | 2.0 GB | **87.2** | +------------------------------------------------------------------------+ +----------+-----------------+------------+------------+ | :config:`ner_ontonotes_bert ` | | En | 400 MB | 1.3 GB | 87.7 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`ner_ontonotes ` | | | 331 MB | 7.8 MB | 86.7 | +------------------------------------------------------------------------+--------------------+ +-----------------+------------+------------+ - | :config:`ner_conll2003_bert ` | CoNLL-2003 | | 400 MB | 850 MB | 91.7 | - +------------------------------------------------------------------------+ + +-----------------+------------+------------+ - | :config:`ner_conll2003_torch_bert ` | | | --- | 1.3 GB | 90.7 | + | :config:`ner_conll2003_bert ` | CoNLL-2003 | | --- | 1.3 GB | 90.7 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`ner_conll2003 ` | | | 331 MB | 3.1 MB | 89.9 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 10b0f51797..fff921a5b4 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -37,9 +37,7 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01 + + +--------------------------------------------------------------------------------------------+-------------+ | | | :config:`ner_ontonotes.json ` | 87.1 | +---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ -| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | -+ + +--------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | +| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 90.7 | + + +--------------------------------------------------------------------------------------------+-------------+ | | | :config:`ner_conll2003.json ` | 89.9 | +---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 3b70c43d15..96a9486feb 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -172,8 +172,7 @@ ("ner/ner_conll2003.json", "ner_conll2003", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK], ("ner/ner_ontonotes.json", "ner_ontonotes", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK], ("ner/ner_ontonotes_bert_emb.json", "ner_ontonotes_bert_emb", ('TI',)): [ONE_ARGUMENT_INFER_CHECK], - ("ner/ner_rus.json", "ner_rus", ('IP',)): [ONE_ARGUMENT_INFER_CHECK], - ("ner/ner_conll2003_torch_bert.json", "ner_conll2003_torch_bert", ('IP', 'TI')): [ONE_ARGUMENT_INFER_CHECK] + ("ner/ner_rus.json", "ner_rus", ('IP',)): [ONE_ARGUMENT_INFER_CHECK] }, "sentence_segmentation": { ("sentence_segmentation/sentseg_dailydialog.json", "sentseg_dailydialog", ('IP', 'TI')): [