Skip to content

Commit

Permalink
Release 0.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
yoptar committed May 6, 2019
2 parents f3e4a69 + d0656f7 commit 2c83002
Show file tree
Hide file tree
Showing 132 changed files with 8,116 additions and 7,945 deletions.
1 change: 1 addition & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ node('gpu') {
}
stage('Setup') {
env.TFHUB_CACHE_DIR="tfhub_cache"
env.LD_LIBRARY_PATH="/usr/local/cuda-9.0/lib64"
sh """
virtualenv --python=python3 '.venv-$BUILD_NUMBER'
. '.venv-$BUILD_NUMBER/bin/activate'
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ Here is our [DockerHub repository](https://hub.docker.com/u/deeppavlov/) with im

# Tutorials

Jupyter notebooks and videos explaining how to use DeepPalov for different tasks can be found in [/examples/tutorials/](examples/tutorials)
Jupyter notebooks and videos explaining how to use DeepPalov for different tasks can be found in [/examples/](examples)

# License

Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def evaluate_model(config: [str, Path, dict], download: bool = False, recursive:
except ImportError:
'Assuming that requirements are not yet installed'

__version__ = '0.2.0'
__version__ = '0.3.0'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
49 changes: 49 additions & 0 deletions deeppavlov/configs/aiml_skill/aiml_skill.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"chainer": {
"in": [
"utterances_batch",
"history_batch",
"states_batch"
],
"out": [
"responses_batch",
"confidences_batch",
"output_states_batch"
],
"pipe": [
{
"class_name": "aiml_skill",
"path_to_aiml_scripts": "{DOWNLOADS_PATH}/aiml_scripts",
"positive_confidence": 0.66,
"null_response": "I don't know",
"null_confidence": 0.33,
"in": [
"utterances_batch",
"history_batch",
"states_batch"
],
"out": [
"responses_batch",
"confidences_batch",
"output_states_batch"
]
}
]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/aiml_skill.txt"
],
"download": [
{
"url": "http://files.deeppavlov.ai/aiml_skill/aiml_scripts.tar.gz",
"subdir": "{DOWNLOADS_PATH}"
}
]
}
}
1 change: 1 addition & 0 deletions deeppavlov/configs/classifiers/intents_dstc2.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
}
],
"out": [
"y_pred_labels",
"y_pred_probas"
]
},
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/configs/classifiers/intents_dstc2_big.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
}
],
"out": [
"y_pred_probas"
"y_pred_labels"
]
},
"train": {
Expand Down
85 changes: 85 additions & 0 deletions deeppavlov/configs/classifiers/paraphraser_rubert.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"dataset_reader": {
"class_name": "paraphraser_reader",
"data_path": "{DOWNLOADS_PATH}/paraphraser_data",
"do_lower_case": false
},
"dataset_iterator": {
"class_name": "siamese_iterator",
"seed": 243,
"len_valid": 500
},
"chainer": {
"in": ["text_a", "text_b"],
"in_y": ["y"],
"pipe": [
{
"class_name": "bert_preprocessor",
"vocab_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/vocab.txt",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["text_a", "text_b"],
"out": ["bert_features"]
},
{
"class_name": "bert_classifier",
"n_classes": 2,
"one_hot_labels": false,
"bert_config_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_config.json",
"pretrained_bert": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_model.ckpt",
"save_path": "{MODELS_PATH}/paraphraser_rubert/model_rubert",
"load_path": "{MODELS_PATH}/paraphraser_rubert/model_rubert",
"keep_prob": 0.5,
"optimizer": "tf.train:AdamOptimizer",
"learning_rate": 2e-05,
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y"],
"out": ["predictions"]
}
],
"out": ["predictions"]
},
"train": {
"batch_size": 64,
"pytest_max_batches": 2,
"train_metrics": ["f1", "acc"],
"metrics": ["f1", "acc"],
"validation_patience": 7,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"validate_best": true,
"test_best": true,
"tensorboard_log_dir": "{MODELS_PATH}/paraphraser_rubert/logs"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
"{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
],
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_rubert_v0.tar.gz",
"subdir": "{ROOT_PATH}/models"
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"class_name": "basic_classification_iterator",
"seed": 42,
"field_to_split": "train",
"split_seed": 23,
"split_fields": [
"train",
"valid"
Expand Down Expand Up @@ -46,7 +47,8 @@
"out": [
"x_prep"
],
"class_name": "dirty_comments_preprocessor"
"class_name": "dirty_comments_preprocessor",
"remove_punctuation": false
},
{
"in": "x_prep",
Expand Down Expand Up @@ -103,7 +105,7 @@
"optimizer": "Adam",
"learning_rate": 0.01,
"learning_rate_decay": 0.1,
"loss": "binary_crossentropy",
"loss": "categorical_crossentropy",
"last_layer_activation": "softmax",
"coef_reg_cnn": 1e-3,
"coef_reg_den": 1e-2,
Expand All @@ -129,7 +131,7 @@
},
"train": {
"epochs": 100,
"batch_size": 32,
"batch_size": 128,
"metrics": [
"f1_weighted",
"f1_macro",
Expand All @@ -155,7 +157,7 @@
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models/classifiers/rusentiment_v4"
"MODELS_PATH": "{ROOT_PATH}/models/classifiers/rusentiment_v10"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
Expand All @@ -167,7 +169,7 @@
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_v4.tar.gz",
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_v10.tar.gz",
"subdir": "{ROOT_PATH}/models/classifiers"
}
]
Expand Down
30 changes: 17 additions & 13 deletions deeppavlov/configs/ner/ner_conll2003.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
"pad_with_zeros": true,
"special_tokens": ["<UNK>"],
"fit_on": ["x_san"],
"save_path": "{MODELS_PATH}/ner_conll2003/word.dict",
"load_path": "{MODELS_PATH}/ner_conll2003/word.dict",
"save_path": "{NER_PATH}/word.dict",
"load_path": "{NER_PATH}/word.dict",
"out": ["x_tok_ind"]
},
{
Expand All @@ -45,8 +45,8 @@
"class_name": "simple_vocab",
"pad_with_zeros": true,
"fit_on": ["y"],
"save_path": "{MODELS_PATH}/ner_conll2003/tag.dict",
"load_path": "{MODELS_PATH}/ner_conll2003/tag.dict",
"save_path": "{NER_PATH}/tag.dict",
"load_path": "{NER_PATH}/tag.dict",
"out": ["y_ind"]
},
{
Expand All @@ -60,8 +60,8 @@
"class_name": "simple_vocab",
"pad_with_zeros": true,
"fit_on": ["x_char"],
"save_path": "{MODELS_PATH}/ner_conll2003/char.dict",
"load_path": "{MODELS_PATH}/ner_conll2003/char.dict",
"save_path": "{NER_PATH}/char.dict",
"load_path": "{NER_PATH}/char.dict",
"out": ["x_char_ind"]
},
{
Expand Down Expand Up @@ -111,18 +111,17 @@
"n_tags": "#tag_vocab.len",
"capitalization_dim": "#capitalization.dim",
"char_emb_dim": "#embeddings_char.dim",
"save_path": "{MODELS_PATH}/ner_conll2003/model_no_pos",
"load_path": "{MODELS_PATH}/ner_conll2003/model_no_pos",
"save_path": "{NER_PATH}/model_no_pos",
"load_path": "{NER_PATH}/model_no_pos",
"char_emb_mat": "#embeddings_char.emb_mat",
"two_dense_on_top": true,
"use_crf": true,
"use_batch_norm": true,
"embeddings_dropout": true,
"top_dropout": true,
"intra_layer_dropout": true,
"l2_reg": 0,
"learning_rate": 1e-2,
"dropout_keep_prob": 0.5
"dropout_keep_prob": 0.7
},
{
"ref": "tag_vocab",
Expand All @@ -140,6 +139,10 @@
{
"name": "ner_f1",
"inputs": ["y", "tags"]
},
{
"name": "ner_token_f1",
"inputs": ["y", "tags"]
}
],
"validation_patience": 7,
Expand All @@ -157,7 +160,8 @@
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
"MODELS_PATH": "{ROOT_PATH}/models",
"NER_PATH": "{MODELS_PATH}/ner_conll2003"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/gensim.txt",
Expand All @@ -169,7 +173,7 @@
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_v4_cpu_compatible.tar.gz",
"url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_v5.tar.gz",
"subdir": "{MODELS_PATH}"
},
{
Expand All @@ -178,4 +182,4 @@
}
]
}
}
}

0 comments on commit 2c83002

Please sign in to comment.