Skip to content

Commit

Permalink
feat: output tokens as well as tags in all NER configs (#337)
Browse files Browse the repository at this point in the history
* fix: output tokens for ner network

* fix: fix all ner configs
  • Loading branch information
mu-arkhipov authored and yoptar committed Jul 25, 2018
1 parent e1e7fe6 commit d18497c
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 35 deletions.
13 changes: 6 additions & 7 deletions deeppavlov/configs/ner/ner_conll2003.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand Down Expand Up @@ -49,7 +49,7 @@
"out": ["y_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "char_splitter",
"out": ["x_char"]
},
Expand All @@ -64,7 +64,7 @@
"out": ["x_char_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "mask",
"out": ["mask"]
},
Expand Down Expand Up @@ -93,7 +93,7 @@
{
"id": "capitalization",
"name": "capitalization_featurizer",
"in": ["x"],
"in": ["x_tokens"],
"out": ["cap"]
},
{
Expand Down Expand Up @@ -129,8 +129,7 @@
"out": ["tags"]
}
],

"out": ["tags"]
"out": ["x_tokens", "tags"]
},
"train": {
"epochs": 100,
Expand Down
12 changes: 6 additions & 6 deletions deeppavlov/configs/ner/ner_conll2003_pos.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand Down Expand Up @@ -65,7 +65,7 @@
"out": ["y_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "char_splitter",
"out": ["x_char"]
},
Expand All @@ -80,7 +80,7 @@
"out": ["x_char_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "mask",
"out": ["mask"]
},
Expand Down Expand Up @@ -110,7 +110,7 @@
{
"id": "capitalization",
"name": "capitalization_featurizer",
"in": ["x"],
"in": ["x_tokens"],
"out": ["cap"]
},
{
Expand Down Expand Up @@ -148,7 +148,7 @@
}
],

"out": ["tags"]
"out": ["x_tokens", "tags"]
},
"train": {
"epochs": 100,
Expand Down
8 changes: 4 additions & 4 deletions deeppavlov/configs/ner/ner_dstc2.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand Down Expand Up @@ -48,7 +48,7 @@
"out": ["y_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "mask",
"out": ["mask"]
},
Expand Down Expand Up @@ -77,7 +77,7 @@
"out": ["tags"]
}
],
"out": ["tags"]
"out": ["x_tokens", "tags"]
},
"train": {
"epochs": 100,
Expand Down
12 changes: 6 additions & 6 deletions deeppavlov/configs/ner/ner_ontonotes.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand All @@ -38,7 +38,7 @@
"out": ["y_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "char_splitter",
"out": ["x_char"]
},
Expand All @@ -53,7 +53,7 @@
"out": ["x_char_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "mask",
"out": ["mask"]
},
Expand All @@ -77,7 +77,7 @@
{
"id": "capitalization",
"name": "capitalization_featurizer",
"in": ["x"],
"in": ["x_tokens"],
"out": ["cap"]
},
{
Expand Down Expand Up @@ -114,7 +114,7 @@
}
],

"out": ["tags"]
"out": ["x_tokens", "tags"]
},
"train": {
"epochs": 100,
Expand Down
12 changes: 6 additions & 6 deletions deeppavlov/configs/ner/ner_rus.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand Down Expand Up @@ -49,7 +49,7 @@
"out": ["y_ind"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "char_splitter",
"out": ["x_char"]
},
Expand All @@ -72,7 +72,7 @@
"out": ["x_emb"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "mask",
"out": ["mask"]
},
Expand All @@ -91,7 +91,7 @@
{
"id": "capitalization",
"name": "capitalization_featurizer",
"in": ["x"],
"in": ["x_tokens"],
"out": ["cap"]
},
{
Expand Down Expand Up @@ -127,7 +127,7 @@
}
],

"out": ["tags"]
"out": ["x_tokens", "tags"]
},
"train": {
"epochs": 100,
Expand Down
8 changes: 4 additions & 4 deletions deeppavlov/configs/ner/slotfill_dstc2.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"config_path": "../deeppavlov/configs/ner/ner_dstc2.json",
"out": ["tags"]
"out": ["x_tokens", "tags"]
},

{
"in": ["x", "tags"],
"in": ["x_tokens", "tags"],
"name": "dstc_slotfilling",
"threshold": 0.8,
"save_path": "slotfill_dstc2/dstc_slot_vals.json",
Expand Down
12 changes: 10 additions & 2 deletions deeppavlov/configs/ner/slotfill_dstc2_raw.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
{
"dataset_reader": {
"name": "dstc2_reader",
"data_path": "dstc2"
},
"dataset_iterator": {
"name": "dstc2_ner_iterator",
"dataset_path": "dstc2"
},
"chainer": {
"in": ["x"],
"pipe": [
{
"in": ["x"],
"name": "lazy_tokenizer",
"out": ["x"]
"out": ["x_tokens"]
},
{
"in": ["x"],
"in": ["x_tokens"],
"name": "str_lower",
"out": ["x_lower"]
},
Expand Down
1 change: 1 addition & 0 deletions deeppavlov/metrics/fmeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

@register_metric('ner_f1')
def ner_f1(y_true, y_predicted):
_, y_predicted = zip(*y_predicted)
y_true = list(chain(*y_true))
y_predicted = list(chain(*y_predicted))
results = precision_recall_f1(y_true,
Expand Down

0 comments on commit d18497c

Please sign in to comment.