forked from rimijoker/DeepPavlov
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: Python pipeline wrappers (deeppavlov#1491) * feat: python pipeline wrappers * minor docstring fixes * docs: minor docstring edit Co-authored-by: Maxim Talimanchuk <mtalimanchuk@gmail.com> * refactor: Remove deeppavlov.configs.elmo (deeppavlov#1498) * removed elmo config files * removed elmo_file_paths_iterator, elmo_model and file_paths_reader * refactor: returned file_paths_reader * docs: newlines in file_paths_reader docstring * refactor: remove deeppavlov.configs.skills (deeppavlov#1499) * remove: deeppavlov.configs.skills * delete: aiml_skill component * remove: rasa_skill component * remove: DSLSkill component * docs: removed skills from docs apiref * Feat/glue superglue update (deeppavlov#1508) * Add wnli config * Update copa config * Fix path * Fix record path * Exclude train from evaluation * Exclude train from evaluation * add ranker * update ranker * feat: deeppavlov version update Co-authored-by: Fedor Ignatov <ignatov.fedor@gmail.com> Co-authored-by: slowwavesleep <44175589+slowwavesleep@users.noreply.github.com> * xlm configs Co-authored-by: Fedor Ignatov <ignatov.fedor@gmail.com> Co-authored-by: Maxim Talimanchuk <mtalimanchuk@gmail.com> Co-authored-by: Vasily <vaskonov@users.noreply.github.com> Co-authored-by: slowwavesleep <44175589+slowwavesleep@users.noreply.github.com>
- Loading branch information
1 parent
07fda07
commit a1ba1e9
Showing
66 changed files
with
870 additions
and
4,686 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
141 changes: 141 additions & 0 deletions
141
deeppavlov/configs/classifiers/glue/glue_mnli_xlm_roberta.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
{ | ||
"metadata": { | ||
"variables": { | ||
"ROOT_PATH": "~/.deeppavlov", | ||
"BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru", | ||
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads", | ||
"MODELS_PATH": "{ROOT_PATH}/models", | ||
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli/{BASE_MODEL}" | ||
} | ||
}, | ||
"dataset_reader": { | ||
"class_name": "huggingface_dataset_reader", | ||
"path": "glue", | ||
"name": "mnli", | ||
"train": "train", | ||
"valid": "validation_matched", | ||
"test": "test_matched" | ||
}, | ||
"dataset_iterator": { | ||
"class_name": "huggingface_dataset_iterator", | ||
"features": [ | ||
"hypothesis", | ||
"premise" | ||
], | ||
"label": "label", | ||
"seed": 42 | ||
}, | ||
"chainer": { | ||
"in": [ | ||
"hypothesis", | ||
"premise" | ||
], | ||
"in_y": [ | ||
"y" | ||
], | ||
"pipe": [ | ||
{ | ||
"class_name": "torch_transformers_preprocessor", | ||
"vocab_file": "{BASE_MODEL}", | ||
"do_lower_case": false, | ||
"max_seq_length": 192, | ||
"truncation": "longest_first", | ||
"padding": "longest", | ||
"in": [ | ||
"hypothesis", | ||
"premise" | ||
], | ||
"out": [ | ||
"bert_features" | ||
] | ||
}, | ||
{ | ||
"id": "classes_vocab", | ||
"class_name": "simple_vocab", | ||
"fit_on": [ | ||
"y" | ||
], | ||
"save_path": "{MODEL_PATH}/classes.dict", | ||
"load_path": "{MODEL_PATH}/classes.dict", | ||
"in": [ | ||
"y" | ||
], | ||
"out": [ | ||
"y_ids" | ||
] | ||
}, | ||
{ | ||
"in": [ | ||
"y_ids" | ||
], | ||
"out": [ | ||
"y_onehot" | ||
], | ||
"class_name": "one_hotter", | ||
"depth": "#classes_vocab.len", | ||
"single_vector": true | ||
}, | ||
{ | ||
"class_name": "torch_transformers_classifier", | ||
"n_classes": "#classes_vocab.len", | ||
"return_probas": true, | ||
"pretrained_bert": "{BASE_MODEL}", | ||
"save_path": "{MODEL_PATH}/model", | ||
"load_path": "{MODEL_PATH}/model", | ||
"optimizer": "AdamW", | ||
"optimizer_parameters": { | ||
"lr": 5e-06 | ||
}, | ||
"learning_rate_drop_patience": 4, | ||
"learning_rate_drop_div": 2.0, | ||
"in": [ | ||
"bert_features" | ||
], | ||
"in_y": [ | ||
"y_ids" | ||
], | ||
"out": [ | ||
"y_pred_probas" | ||
] | ||
}, | ||
{ | ||
"in": [ | ||
"y_pred_probas" | ||
], | ||
"out": [ | ||
"y_pred_ids" | ||
], | ||
"class_name": "proba2labels", | ||
"max_proba": true | ||
}, | ||
{ | ||
"in": [ | ||
"y_pred_ids" | ||
], | ||
"out": [ | ||
"y_pred_labels" | ||
], | ||
"ref": "classes_vocab" | ||
} | ||
], | ||
"out": [ | ||
"y_pred_labels" | ||
] | ||
}, | ||
"train": { | ||
"batch_size": 64, | ||
"metrics": [ | ||
"accuracy" | ||
], | ||
"validation_patience": 10, | ||
"val_every_n_batches": 3066, | ||
"log_every_n_batches": 3066, | ||
"show_examples": false, | ||
"evaluation_targets": [ | ||
"valid" | ||
], | ||
"class_name": "torch_trainer", | ||
"tensorboard_log_dir": "{MODEL_PATH}/", | ||
"pytest_max_batches": 2 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
147 changes: 147 additions & 0 deletions
147
deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
{ | ||
"metadata": { | ||
"variables": { | ||
"ROOT_PATH": "~/.deeppavlov", | ||
"BASE_MODEL": "roberta-large", | ||
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads", | ||
"MODELS_PATH": "{ROOT_PATH}/models", | ||
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_wnli/{BASE_MODEL}" | ||
}, | ||
"download": [ | ||
{ | ||
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_wnli_roberta.tar.gz", | ||
"subdir": "{MODELS_PATH}" | ||
} | ||
] | ||
}, | ||
"dataset_reader": { | ||
"class_name": "huggingface_dataset_reader", | ||
"path": "glue", | ||
"name": "wnli", | ||
"train": "train", | ||
"valid": "validation" | ||
}, | ||
"dataset_iterator": { | ||
"class_name": "huggingface_dataset_iterator", | ||
"features": [ | ||
"sentence1", | ||
"sentence2" | ||
], | ||
"label": "label", | ||
"seed": 42 | ||
}, | ||
"chainer": { | ||
"in": [ | ||
"sentence1", | ||
"sentence2" | ||
], | ||
"in_y": [ | ||
"y" | ||
], | ||
"pipe": [ | ||
{ | ||
"class_name": "torch_transformers_preprocessor", | ||
"vocab_file": "{BASE_MODEL}", | ||
"do_lower_case": false, | ||
"max_seq_length": 192, | ||
"truncation": "longest_first", | ||
"padding": "longest", | ||
"in": [ | ||
"sentence1", | ||
"sentence2" | ||
], | ||
"out": [ | ||
"bert_features" | ||
] | ||
}, | ||
{ | ||
"id": "classes_vocab", | ||
"class_name": "simple_vocab", | ||
"fit_on": [ | ||
"y" | ||
], | ||
"save_path": "{MODEL_PATH}/classes.dict", | ||
"load_path": "{MODEL_PATH}/classes.dict", | ||
"in": [ | ||
"y" | ||
], | ||
"out": [ | ||
"y_ids" | ||
] | ||
}, | ||
{ | ||
"in": [ | ||
"y_ids" | ||
], | ||
"out": [ | ||
"y_onehot" | ||
], | ||
"class_name": "one_hotter", | ||
"depth": "#classes_vocab.len", | ||
"single_vector": true | ||
}, | ||
{ | ||
"class_name": "torch_transformers_classifier", | ||
"n_classes": "#classes_vocab.len", | ||
"return_probas": true, | ||
"pretrained_bert": "{BASE_MODEL}", | ||
"save_path": "{MODEL_PATH}/model", | ||
"load_path": "{MODEL_PATH}/model", | ||
"optimizer": "AdamW", | ||
"optimizer_parameters": { | ||
"lr": 1e-05 | ||
}, | ||
"learning_rate_drop_patience": 3, | ||
"learning_rate_drop_div": 2.0, | ||
"in": [ | ||
"bert_features" | ||
], | ||
"in_y": [ | ||
"y_ids" | ||
], | ||
"out": [ | ||
"y_pred_probas" | ||
] | ||
}, | ||
{ | ||
"in": [ | ||
"y_pred_probas" | ||
], | ||
"out": [ | ||
"y_pred_ids" | ||
], | ||
"class_name": "proba2labels", | ||
"max_proba": true | ||
}, | ||
{ | ||
"in": [ | ||
"y_pred_ids" | ||
], | ||
"out": [ | ||
"y_pred_labels" | ||
], | ||
"ref": "classes_vocab" | ||
} | ||
], | ||
"out": [ | ||
"y_pred_labels" | ||
] | ||
}, | ||
"train": { | ||
"batch_size": 24, | ||
"metrics": [ | ||
"accuracy" | ||
], | ||
"epochs": 1, | ||
"val_every_n_batches": 250, | ||
"log_every_n_batches": 250, | ||
"show_examples": false, | ||
"evaluation_targets": [ | ||
"train", | ||
"valid" | ||
], | ||
"class_name": "torch_trainer", | ||
"tensorboard_log_dir": "{MODEL_PATH}/", | ||
"pytest_max_batches": 2 | ||
} | ||
} |
Oops, something went wrong.