Skip to content

Commit

Permalink
Feat/rsg (#1577)
Browse files Browse the repository at this point in the history
  • Loading branch information
vaskonov committed Jul 14, 2022
1 parent caa1f63 commit bfd5cd0
Show file tree
Hide file tree
Showing 20 changed files with 1,366 additions and 41 deletions.
16 changes: 4 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,15 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c

**Models**

[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html)
[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html) | [Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) |

[Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) | [Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/squad.html)

[Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/kbqa.html)
[Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/squad.html) | [Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/kbqa.html)

[Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/master/features/models/neural_ranking.html) | [TF-IDF Ranking](http://docs.deeppavlov.ai/en/master/features/models/tfidf_ranking.html)

[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html)
[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)

[Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)
[Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)

**Skills**

Expand Down Expand Up @@ -66,19 +64,13 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c

0. We support `Linux` platform, `Python 3.6`, `3.7`, `3.8` and `3.9`
* **`Python 3.5` is not supported!**
* **installation for `Windows` requires `Git`(for example, [git](https://git-scm.com/download/win)) and `Visual Studio 2015/2017` with `C++` build tools installed!**

1. Create and activate a virtual environment:
* `Linux`
```
python -m venv env
source ./env/bin/activate
```
* `Windows`
```
python -m venv env
.\env\Scripts\activate.bat
```
2. Install the package inside the environment:
```
pip install deeppavlov
Expand Down
1 change: 1 addition & 0 deletions deeppavlov/configs/ner/ner_case_agnostic_mdistilbert.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"dataset_reader": {
"class_name": "conll2003_reader",
"data_path": "{DOWNLOADS_PATH}/conll2003/",
"dataset_name": "conll2003",
"provide_pos": false
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["question", "passage"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["question", "passage"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["question", "passage"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"is_binary": "{BINARY_CLASSIFICATION}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05},
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"is_binary": "{BINARY_CLASSIFICATION}",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 4,
"metrics": ["accuracy"],
"epochs": 10,
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "russian_super_glue",
"BINARY_CLASSIFICATION": false,
"TASK": "danetqa",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_danetqa_rubert.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"is_binary": "{BINARY_CLASSIFICATION}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05, "weight_decay": 0.1},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"is_binary": "{BINARY_CLASSIFICATION}",
"confidence_threshold": 0.5
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 16,
"metrics": ["matthews_correlation"],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["test"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "russian_super_glue",
"BINARY_CLASSIFICATION": false,
"TASK": "lidirus",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/terra/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_terra_rubert.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["context", "answer", "idx"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["context", "answer", "idx"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["context", "answer"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"BINARY_CLASSIFICATION": false,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05},
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"is_binary": "{BINARY_CLASSIFICATION}",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 8,
"metrics": ["roc_auc","f1"],
"epochs": 10,
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "russian_super_glue",
"BINARY_CLASSIFICATION": false,
"TASK": "muserc",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_muserc_rubert.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}

0 comments on commit bfd5cd0

Please sign in to comment.