Skip to content

Commit

Permalink
Release 0.5.1 (#971)
Browse files Browse the repository at this point in the history
  • Loading branch information
yoptar committed Aug 13, 2019
2 parents 2a9b9cf + 6eb02f7 commit e9fde72
Show file tree
Hide file tree
Showing 11 changed files with 433 additions and 65 deletions.
5 changes: 0 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,6 @@ and others in the Integrations section for more info.
- models depending on `tensorflow` require `CUDA 10.0` to run on GPU instead of `CUDA 9.0`
- scikit-learn models have to be redownloaded or retrained

**Breaking changes in version 0.5.0**
- dependencies have to be reinstalled for most pipeline configurations
- models depending on `tensorflow` require `CUDA 10.0` to run on GPU instead of `CUDA 9.0`
- scikit-learn models have to be redownloaded or retrained

**Breaking changes in version 0.4.0!**
- default target variable name for [neural evolution](https://docs.deeppavlov.ai/en/0.4.0/intro/hypersearch.html#parameters-evolution-for-deeppavlov-models)
was changed from `MODELS_PATH` to `MODEL_PATH`.
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def evaluate_model(config: [str, Path, dict], download: bool = False, recursive:
except ImportError:
'Assuming that requirements are not yet installed'

__version__ = '0.5.0'
__version__ = '0.5.1'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
168 changes: 168 additions & 0 deletions deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
{
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "bert_preprocessor",
"vocab_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/vocab.txt",
"do_lower_case": false,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"id": "my_one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "bert_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"one_hot_labels": true,
"bert_config_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_config.json",
"pretrained_bert": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_model.ckpt",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"keep_prob": 0.5,
"learning_rate": 1e-05,
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": [
"y_pred_probas"
],
"out": [
"y_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": [
"y_pred_ids"
],
"out": [
"y_pred_labels"
],
"ref": "classes_vocab"
},
{
"ref": "my_one_hotter",
"in": [
"y_pred_ids"
],
"out": [
"y_pred_onehot"
]
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
},
{
"name": "sets_accuracy",
"inputs": [
"y",
"y_pred_labels"
]
},
{
"name": "f1_macro",
"inputs": [
"y",
"y_pred_labels"
]
}
],
"validation_patience": 20,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid"
],
"tensorboard_log_dir": "{MODEL_PATH}/",
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v3"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
"{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
],
"labels": {
"telegram_utils": "IntentModel",
"server_utils": "KerasIntentModel"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/yahoo_convers_vs_info_v3.tar.gz",
"subdir": "{MODELS_PATH}/classifiers/"
}
]
}
}
74 changes: 74 additions & 0 deletions deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"chainer": {
"in": ["context_raw", "question_raw"],
"in_y": ["ans_raw", "ans_raw_start"],
"pipe": [
{
"class_name": "bert_preprocessor",
"vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt",
"do_lower_case": false,
"max_seq_length": 384,
"in": ["question_raw", "context_raw"],
"out": ["bert_features"]
},
{
"class_name": "squad_bert_mapping",
"do_lower_case": false,
"in": ["context_raw", "bert_features"],
"out": ["subtok2chars", "char2subtoks"]
},
{
"class_name": "squad_bert_ans_preprocessor",
"do_lower_case": false,
"in": ["ans_raw", "ans_raw_start","char2subtoks"],
"out": ["ans", "ans_start", "ans_end"]
},
{
"class_name": "squad_bert_model",
"bert_config_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_config.json",
"pretrained_bert": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_model.ckpt",
"save_path": "{MODELS_PATH}/squad_bert/model_multi_freezed",
"load_path": "{MODELS_PATH}/squad_bert/model_multi_freezed",
"keep_prob": 0.5,
"learning_rate": 2e-05,
"learning_rate_drop_patience": 2,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["ans_start", "ans_end"],
"out": ["ans_start_predicted", "ans_end_predicted", "logits"]
},
{
"class_name": "squad_bert_ans_postprocessor",
"in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"],
"out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"]
}
],
"out": ["ans_predicted", "ans_start_predicted", "logits"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
"{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
],
"labels": {
"telegram_utils": "SquadModel",
"server_utils": "SquadModel"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip",
"subdir": "{DOWNLOADS_PATH}/bert_models"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/squad_bert_mult_freezed.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
}
}

116 changes: 116 additions & 0 deletions deeppavlov/configs/squad/squad_zh_bert.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"dataset_reader": {
"class_name": "squad_dataset_reader",
"dataset": "SQuAD",
"url": "http://files.deeppavlov.ai/datasets/DRCD.tar.gz",
"data_path": "{DOWNLOADS_PATH}/DRCD_train/"
},
"dataset_iterator": {
"class_name": "squad_iterator",
"seed": 1337,
"shuffle": true
},
"chainer": {
"in": ["context_raw", "question_raw"],
"in_y": ["ans_raw", "ans_raw_start"],
"pipe": [
{
"class_name": "bert_preprocessor",
"vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt",
"do_lower_case": "{lowercase}",
"max_seq_length": 384,
"in": ["question_raw", "context_raw"],
"out": ["bert_features"]
},
{
"class_name": "squad_bert_mapping",
"do_lower_case": "{lowercase}",
"in": ["context_raw", "bert_features"],
"out": ["subtok2chars", "char2subtoks"]
},
{
"class_name": "squad_bert_ans_preprocessor",
"do_lower_case": "{lowercase}",
"in": ["ans_raw", "ans_raw_start","char2subtoks"],
"out": ["ans", "ans_start", "ans_end"]
},
{
"class_name": "squad_bert_model",
"bert_config_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_config.json",
"pretrained_bert": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_model.ckpt",
"save_path": "{MODELS_PATH}/squad_zh_bert/model_multi",
"load_path": "{MODELS_PATH}/squad_zh_bert/model_multi",
"keep_prob": 0.5,
"learning_rate": 2e-05,
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["ans_start", "ans_end"],
"out": ["ans_start_predicted", "ans_end_predicted", "logits", "score"]
},
{
"class_name": "squad_bert_ans_postprocessor",
"in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"],
"out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"]
}
],
"out": ["ans_predicted", "ans_start_predicted", "logits"]
},
"train": {
"show_examples": false,
"test_best": false,
"validate_best": true,
"log_every_n_batches": 250,
"val_every_n_batches": 500,
"batch_size": 10,
"pytest_max_batches": 2,
"pytest_batch_size": 5,
"validation_patience": 10,
"metrics": [
{
"name": "squad_v2_f1",
"inputs": ["ans", "ans_predicted"]
},
{
"name": "squad_v2_em",
"inputs": ["ans", "ans_predicted"]
},
{
"name": "squad_v1_f1",
"inputs": ["ans", "ans_predicted"]
},
{
"name": "squad_v1_em",
"inputs": ["ans", "ans_predicted"]
}
],
"tensorboard_log_dir": "{MODELS_PATH}/squad_zh_bert/logs"
},
"metadata": {
"variables": {
"lowercase": false,
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
"{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
],
"labels": {
"telegram_utils": "SquadModel",
"server_utils": "SquadModel"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip",
"subdir": "{DOWNLOADS_PATH}/bert_models"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/squad_zh.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
}
}

0 comments on commit e9fde72

Please sign in to comment.