Skip to content

Commit

Permalink
Refactor faq models (#1608)
Browse files Browse the repository at this point in the history
Co-authored-by: vasily <vasili.konov@gmail.com>
Co-authored-by: Fedor Ignatov <ignatov.fedor@gmail.com>
Co-authored-by: Дмитрий Евсеев <dmitrij.euseew@yandex.ru>
  • Loading branch information
4 people committed Feb 13, 2023
1 parent 3ee1b85 commit da6379b
Show file tree
Hide file tree
Showing 28 changed files with 246 additions and 1,191 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c

[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)

[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/odqa.html) | [Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/faq.html)

[Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)
[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/odqa.html) | [Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)

**Embeddings**

Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
},
"download": [
{
"url": "https://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
},
"download": [
{
"url": "https://files.deeppavlov.ai/0.16/classifiers/glue_rte.tar.gz",
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_rte.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
Expand Down
90 changes: 0 additions & 90 deletions deeppavlov/configs/cv/cv_tfidf_autofaq.json

This file was deleted.

5 changes: 4 additions & 1 deletion deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@
"save_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix.npz",
"load_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix.npz",
"tokenizer": {
"class_name": "ru_tokenizer",
"class_name": "stream_spacy_tokenizer",
"spacy_model": "ru_core_news_sm",
"lemmas": true,
"lowercase": true,
"filter_stopwords": true,
"ngram_range": [
1,
2
Expand Down
61 changes: 0 additions & 61 deletions deeppavlov/configs/embedder/tfidf_vectorizer.json

This file was deleted.

69 changes: 0 additions & 69 deletions deeppavlov/configs/faq/fasttext_avg_autofaq.json

This file was deleted.

112 changes: 112 additions & 0 deletions deeppavlov/configs/faq/fasttext_logreg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"format": "json",
"orient": "split",
"x": "text",
"y": "category",
"data_path": "{DOWNLOADS_PATH}/massive/{LANGUAGE}",
"train": "train.json",
"valid": "dev.json",
"test": "test.json"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"shuffle": true,
"shot": 5
},
"chainer": {
"in": ["text"],
"in_y": ["category"],
"pipe": [
{
"class_name": "stream_spacy_tokenizer",
"in": ["text"],
"id": "my_tokenizer",
"lemmas": false,
"out": "token_lemmas",
"spacy_model": "{SPACY_MODEL}"
},
{
"ref": "my_tokenizer",
"in": ["token_lemmas"],
"out": ["text_lem"]
},
{
"class_name": "fasttext",
"in": ["token_lemmas"],
"load_path": "{DOWNLOADS_PATH}/embeddings/fasttext/{LANGUAGE}.bin",
"mean": true,
"out": ["text_vector"]
},
{
"id": "answers_vocab",
"class_name": "simple_vocab",
"fit_on": "category",
"save_path": "{MODEL_PATH}/cat_answers.dict",
"load_path": "{MODEL_PATH}/cat_answers.dict",
"in": ["category"],
"out": ["y_ids"]
},
{
"in": ["text_vector"],
"fit_on": ["text_vector", "y_ids"],
"out": ["y_pred_proba"],
"class_name": "sklearn_component",
"main": true,
"save_path": "{MODEL_PATH}/model.pkl",
"load_path": "{MODEL_PATH}/model.pkl",
"model_class": "sklearn.linear_model:LogisticRegression",
"infer_method": "predict_proba",
"C": 10,
"penalty": "l2"
},
{
"in": ["y_pred_proba"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_category"],
"ref": "answers_vocab"
}
],
"out": ["y_pred_category"]
},
"train": {
"evaluation_targets": ["train", "valid", "test"],
"class_name": "fit_trainer",
"metrics": [
{
"name": "accuracy",
"inputs": ["category", "y_pred_category"]
}
]
},
"metadata": {
"variables": {
"LANGUAGE": "en",
"ROOT_PATH": "~/.deeppavlov",
"SPACY_MODEL": "en_core_web_sm",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODEL_PATH": "{ROOT_PATH}/models/faq/{LANGUAGE}/fasttext_logreg"
},
"download": [
{
"url": "http://files.deeppavlov.ai/embeddings/fasttext/{LANGUAGE}.bin",
"subdir": "{DOWNLOADS_PATH}/embeddings/fasttext"
},
{
"url": "http://files.deeppavlov.ai/datasets/massive-{LANGUAGE}.tar.gz",
"subdir": "{DOWNLOADS_PATH}/massive/{LANGUAGE}"
},
{
"url": "https://files.deeppavlov.ai/faq/fasttext_logreg_{LANGUAGE}.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}

0 comments on commit da6379b

Please sign in to comment.