Skip to content

Commit

Permalink
Xlm glue (#10)
Browse files Browse the repository at this point in the history
* feat: Python pipeline wrappers (deeppavlov#1491)

* feat: python pipeline wrappers

* minor docstring fixes

* docs: minor docstring edit

Co-authored-by: Maxim Talimanchuk <mtalimanchuk@gmail.com>

* refactor: Remove deeppavlov.configs.elmo (deeppavlov#1498)

* removed elmo config files

* removed elmo_file_paths_iterator, elmo_model and file_paths_reader

* refactor: returned file_paths_reader

* docs: newlines in file_paths_reader docstring

* refactor: remove deeppavlov.configs.skills (deeppavlov#1499)

* remove: deeppavlov.configs.skills

* delete: aiml_skill component

* remove: rasa_skill component

* remove: DSLSkill component

* docs: removed skills from docs apiref

* Feat/glue superglue update (deeppavlov#1508)

* Add wnli config

* Update copa config

* Fix path

* Fix record path

* Exclude train from evaluation

* Exclude train from evaluation

* add ranker

* update ranker

* feat: deeppavlov version update

Co-authored-by: Fedor Ignatov <ignatov.fedor@gmail.com>
Co-authored-by: slowwavesleep <44175589+slowwavesleep@users.noreply.github.com>

* xlm configs

Co-authored-by: Fedor Ignatov <ignatov.fedor@gmail.com>
Co-authored-by: Maxim Talimanchuk <mtalimanchuk@gmail.com>
Co-authored-by: Vasily <vaskonov@users.noreply.github.com>
Co-authored-by: slowwavesleep <44175589+slowwavesleep@users.noreply.github.com>
  • Loading branch information
5 people committed Jan 28, 2022
1 parent 4f25e43 commit 567ea25
Show file tree
Hide file tree
Showing 66 changed files with 870 additions and 4,686 deletions.
1 change: 1 addition & 0 deletions deeppavlov/__init__.py
Expand Up @@ -19,6 +19,7 @@
from .configs import configs
from .core.commands.infer import build_model
from .core.commands.train import train_evaluate_model_from_config
from .core.common.base import Element, Model
from .core.common.chainer import Chainer
from .core.common.log import init_logger
from .download import deep_download
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/_meta.py
@@ -1,4 +1,4 @@
__version__ = '0.17.0'
__version__ = '0.17.2'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
1 change: 0 additions & 1 deletion deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json
Expand Up @@ -121,7 +121,6 @@
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": [
"train",
"valid"
],
"class_name": "torch_trainer",
Expand Down
141 changes: 141 additions & 0 deletions deeppavlov/configs/classifiers/glue/glue_mnli_xlm_roberta.json
@@ -0,0 +1,141 @@
{
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli/{BASE_MODEL}"
}
},
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"name": "mnli",
"train": "train",
"valid": "validation_matched",
"test": "test_matched"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": [
"hypothesis",
"premise"
],
"label": "label",
"seed": 42
},
"chainer": {
"in": [
"hypothesis",
"premise"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 192,
"truncation": "longest_first",
"padding": "longest",
"in": [
"hypothesis",
"premise"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 5e-06
},
"learning_rate_drop_patience": 4,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": [
"y_pred_probas"
],
"out": [
"y_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": [
"y_pred_ids"
],
"out": [
"y_pred_labels"
],
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"batch_size": 64,
"metrics": [
"accuracy"
],
"validation_patience": 10,
"val_every_n_batches": 3066,
"log_every_n_batches": 3066,
"show_examples": false,
"evaluation_targets": [
"valid"
],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
}
}
Expand Up @@ -121,7 +121,6 @@
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid"
],
"class_name": "torch_trainer",
Expand Down
147 changes: 147 additions & 0 deletions deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json
@@ -0,0 +1,147 @@
{
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"BASE_MODEL": "roberta-large",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_wnli/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_wnli_roberta.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
},
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"name": "wnli",
"train": "train",
"valid": "validation"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": [
"sentence1",
"sentence2"
],
"label": "label",
"seed": 42
},
"chainer": {
"in": [
"sentence1",
"sentence2"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 192,
"truncation": "longest_first",
"padding": "longest",
"in": [
"sentence1",
"sentence2"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": [
"y_pred_probas"
],
"out": [
"y_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": [
"y_pred_ids"
],
"out": [
"y_pred_labels"
],
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"batch_size": 24,
"metrics": [
"accuracy"
],
"epochs": 1,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": [
"train",
"valid"
],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
}
}

0 comments on commit 567ea25

Please sign in to comment.