Skip to content

Commit

Permalink
fix: fix various morphotagger bugs (#367)
Browse files Browse the repository at this point in the history
* fix: fix morphotagger predict configs

* tests: add tests for more morphotagger configs

* fix: make non-pymorphy morphotagger tag all tokens instead of only the first one

* tests: use find_config() in run_model

* fix: move model summary in morphotagger from stdout to logging

* fix: fix training for non-pymorphy morphotagger configs
  • Loading branch information
yoptar committed Aug 10, 2018
1 parent 914527d commit 5ab6b71
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 23 deletions.
2 changes: 1 addition & 1 deletion deeppavlov/models/morpho_tagger/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class TagOutputPrettifier(Component):
"""

def __init__(self, return_string: bool=True, begin: str="",
end: str ="", sep: str ="\n"):
end: str ="", sep: str ="\n", **kwargs):

self.return_string = return_string
self.begin = begin
Expand Down
19 changes: 5 additions & 14 deletions deeppavlov/models/morpho_tagger/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import inspect
import json
from typing import List

import keras.layers as kl
Expand Down Expand Up @@ -110,7 +108,7 @@ def build(self):
self.model_ = Model(inputs, outputs)
self.model_.compile(**compile_args)
if self.verbose > 0:
log.info(str(self.model_.summary()))
self.model_.summary(print_fn=log.info)
return self

def build_word_cnn(self, inputs):
Expand Down Expand Up @@ -172,12 +170,10 @@ def build_basic_network(self, word_outputs):
return pre_outputs, lstm_outputs

def _transform_batch(self, data, labels=None, transform_to_one_hot=True):
if len(self.word_vectorizers) > 0:
data, additional_data = data[0], data[1:]
data, additional_data = data[0], data[1:]
L = max(len(x) for x in data)
X = np.array([self._make_sent_vector(x, L) for x in data])
if len(self.word_vectorizers) > 0:
X = [X] + [np.array(x) for x in additional_data]
X = [X] + [np.array(x) for x in additional_data]
if labels is not None:
Y = np.array([self._make_tags_vector(y, L) for y in labels])
if transform_to_one_hot:
Expand All @@ -197,7 +193,7 @@ def train_on_batch(self, data, labels):
# TO_DO: add weights to deal with padded instances
return self.model_.train_on_batch(X, Y)

def predict_on_batch(self, data: List, return_indexes=False):
def predict_on_batch(self, data: [list, tuple], return_indexes=False):
"""
Makes predictions on a single batch
Expand All @@ -206,10 +202,7 @@ def predict_on_batch(self, data: List, return_indexes=False):
answer: a batch of label sequences
"""
X = self._transform_batch(data)
if len(self.word_vectorizers) > 0:
objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
else:
objects_number, lengths = len(X), [len(elem) for elem in data]
objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]]
Y = self.model_.predict_on_batch(X)
labels = np.argmax(Y, axis=-1)
answer: List[List[str]] = [None] * objects_number
Expand Down Expand Up @@ -245,5 +238,3 @@ def save(self, outfile):

def load(self, infile):
self.model_.load_weights(infile)


8 changes: 2 additions & 6 deletions deeppavlov/models/morpho_tagger/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import copy
import inspect

Expand Down Expand Up @@ -104,11 +103,8 @@ def train_on_batch(self, *args):
Returns:
"""
if len(args) > 2:
data, labels = [list(x) for x in args[:-1]], list(args[-1])
else:
data, labels = args
self._net.train_on_batch(data, labels, **self.train_parameters)
*data, labels = args
self._net.train_on_batch(data, labels)

def __call__(self, *x_batch, **kwargs):
"""
Expand Down
5 changes: 4 additions & 1 deletion deeppavlov/run_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
limitations under the License.
"""

from deeppavlov.deep import find_config
from deeppavlov.core.commands.train import train_evaluate_model_from_config
from deeppavlov.core.commands.infer import interact_model


# PIPELINE_CONFIG_PATH = 'configs/intents/intents_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/intents/intents_snips.json'
PIPELINE_CONFIG_PATH = 'configs/ner/ner_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/ner_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/ner_rus.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/slotfill_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/error_model/brillmoore_wikitypos_en.json'
Expand All @@ -37,6 +38,8 @@
# PIPELINE_CONFIG_PATH = 'configs/odqa/en_odqa_infer_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/ru_odqa_infer_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/ranker_test.json'
# PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train')
PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train_pymorphy')


if __name__ == '__main__':
Expand Down
9 changes: 8 additions & 1 deletion tests/test_quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,15 @@
},
"morpho_tagger":{
("morpho_tagger/UD2.0/hu/morpho_hu_train.json", "morpho_tagger_hu", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK],
("morpho_tagger/UD2.0/hu/morpho_hu_predict.json", "morpho_tagger_hu", ('IP',)): [ONE_ARGUMENT_INFER_CHECK],
("morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train_pymorphy.json",
"morpho_tagger_pymorphy", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK]
"morpho_tagger_pymorphy", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK],
("morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json",
"morpho_tagger_pymorphy", ALL_MODES): [ONE_ARGUMENT_INFER_CHECK],
("morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json",
"morpho_tagger_pymorphy", ('IP',)): [ONE_ARGUMENT_INFER_CHECK],
("morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict_pymorphy.json",
"morpho_tagger_pymorphy", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
}
}

Expand Down

0 comments on commit 5ab6b71

Please sign in to comment.