Skip to content

Commit

Permalink
Release 0.8.0 (#1138)
Browse files Browse the repository at this point in the history
  • Loading branch information
yoptar committed Feb 26, 2020
2 parents 3cb56cd + 7c4f4df commit c10b079
Show file tree
Hide file tree
Showing 99 changed files with 1,969 additions and 2,691 deletions.
2 changes: 1 addition & 1 deletion deeppavlov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def evaluate_model(config: [str, Path, dict], download: bool = False, recursive:
except ImportError:
'Assuming that requirements are not yet installed'

__version__ = '0.7.1'
__version__ = '0.8.0'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
149 changes: 149 additions & 0 deletions deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "Twit",
"y": "Class",
"data_path": "{DOWNLOADS_PATH}/sentiment_twitter_data"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"class_name": "transformers_bert_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["x"],
"out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"class_name": "transformers_bert_embedder",
"id": "my_embedder",
"bert_config_path": "{BERT_PATH}/bert_config.json",
"truncate": false,
"load_path": "{BERT_PATH}",
"in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
"out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"in": [
"word_emb"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
],
"main": true,
"class_name": "keras_classification_model",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"embedding_size": "#my_embedder.dim",
"n_classes": "#classes_vocab.len",
"kernel_sizes_cnn": [
3,
5,
7
],
"filters_cnn": 256,
"optimizer": "Adam",
"learning_rate": 0.01,
"learning_rate_decay": 0.1,
"loss": "binary_crossentropy",
"last_layer_activation": "softmax",
"coef_reg_cnn": 1e-3,
"coef_reg_den": 1e-2,
"dropout_rate": 0.5,
"dense_size": 100,
"model_name": "cnn_model"
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"accuracy",
"f1_macro",
{
"name": "roc_auc",
"inputs": ["y_onehot", "y_pred_probas"]
}
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"valid",
"test"
],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_bert_emb",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_pt"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
"{DEEPPAVLOV_PATH}/requirements/transformers.txt",
"{DEEPPAVLOV_PATH}/requirements/pytorch.txt"
],
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz",
"subdir": "{DOWNLOADS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_pt.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
}
43 changes: 43 additions & 0 deletions deeppavlov/configs/embedder/bert_embedder.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"chainer": {
"in": ["texts"],
"pipe": [
{
"class_name": "transformers_bert_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["texts"],
"out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"class_name": "transformers_bert_embedder",
"bert_config_path": "{BERT_PATH}/bert_config.json",
"load_path": "{BERT_PATH}",
"truncate": true,
"in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
"out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
}
],
"out": ["tokens", "word_emb", "subword_tokens", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
},
"train": {},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12_pt"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/transformers.txt",
"{DEEPPAVLOV_PATH}/requirements/pytorch.txt"
],
"labels": {},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12_pt.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
}
43 changes: 43 additions & 0 deletions deeppavlov/configs/embedder/bert_sentence_embedder.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"chainer": {
"in": ["texts"],
"pipe": [
{
"class_name": "transformers_bert_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["texts"],
"out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"class_name": "transformers_bert_embedder",
"bert_config_path": "{BERT_PATH}/bert_config.json",
"load_path": "{BERT_PATH}",
"truncate": false,
"in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
"out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
}
],
"out": ["max_emb", "mean_emb", "pooler_output"]
},
"train": {},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/transformers.txt",
"{DEEPPAVLOV_PATH}/requirements/pytorch.txt"
],
"labels": {},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/sentence_multi_cased_L-12_H-768_A-12_pt.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,7 @@
"subword_mask_mode": "last",
"token_masking_prob": 0.0,
"in": ["x_words"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "pred_subword_mask"]
},
{
"class_name": "mask",
"in": ["x_subword_tokens"],
"out": ["x_subword_mask"]
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"id": "tag_vocab",
Expand Down Expand Up @@ -77,7 +72,7 @@
"clip_norm": null,
"save_path": "{WORK_PATH}/model",
"load_path": "{WORK_PATH}/model",
"in": ["x_subword_tok_ids", "x_subword_mask", "pred_subword_mask"],
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_predicted_ind"]
},
Expand Down
9 changes: 2 additions & 7 deletions deeppavlov/configs/ner/ner_conll2003_bert.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "pred_subword_mask"]
},
{
"class_name": "mask",
"in": ["x_subword_tokens"],
"out": ["x_subword_mask"]
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"id": "tag_vocab",
Expand Down Expand Up @@ -59,7 +54,7 @@
"clip_norm": 1.0,
"save_path": "{NER_PATH}/model",
"load_path": "{NER_PATH}/model",
"in": ["x_subword_tok_ids", "x_subword_mask", "pred_subword_mask"],
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind"]
},
Expand Down
17 changes: 9 additions & 8 deletions deeppavlov/configs/ner/ner_ontonotes.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
"class_name": "simple_vocab",
"pad_with_zeros": true,
"fit_on": ["y"],
"save_path": "{MODELS_PATH}/ner_ontonotes/tag.dict",
"load_path": "{MODELS_PATH}/ner_ontonotes/tag.dict",
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"out": ["y_ind"]
},
{
Expand All @@ -49,8 +49,8 @@
"class_name": "simple_vocab",
"pad_with_zeros": true,
"fit_on": ["x_char"],
"save_path": "{MODELS_PATH}/ner_ontonotes/char.dict",
"load_path": "{MODELS_PATH}/ner_ontonotes/char.dict",
"save_path": "{MODEL_PATH}/char.dict",
"load_path": "{MODEL_PATH}/char.dict",
"out": ["x_char_ind"]
},
{
Expand Down Expand Up @@ -95,8 +95,8 @@
"n_tags": "#tag_vocab.len",
"capitalization_dim": "#capitalization.dim",
"char_emb_dim": "#embeddings_char.dim",
"save_path": "{MODELS_PATH}/ner_ontonotes/model",
"load_path": "{MODELS_PATH}/ner_ontonotes/model",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"char_emb_mat": "#embeddings_char.emb_mat",
"two_dense_on_top": true,
"use_crf": true,
Expand Down Expand Up @@ -136,7 +136,7 @@
"val_every_n_epochs": 1,

"log_every_n_batches": -1,
"tensorboard_log_dir": "{MODELS_PATH}/ner_ontonotes/logs",
"tensorboard_log_dir": "{MODEL_PATH}/logs",
"show_examples": false,
"class_name": "nn_trainer",
"evaluation_targets": [
Expand All @@ -148,7 +148,8 @@
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/ner_ontonotes"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/gensim.txt",
Expand Down
9 changes: 2 additions & 7 deletions deeppavlov/configs/ner/ner_ontonotes_bert.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "pred_subword_mask"]
},
{
"class_name": "mask",
"in": ["x_subword_tokens"],
"out": ["x_subword_mask"]
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"id": "tag_vocab",
Expand Down Expand Up @@ -59,7 +54,7 @@
"clip_norm": 1.0,
"save_path": "{NER_PATH}/model",
"load_path": "{NER_PATH}/model",
"in": ["x_subword_tok_ids", "x_subword_mask", "pred_subword_mask"],
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind"]
},
Expand Down

0 comments on commit c10b079

Please sign in to comment.