In [1]:
from pathlib import Path
import timeit

In [2]:
src_short = "Please exit the plane after landing."
tgt_short = "Bitte verlassen Sie das Flugzeug."
src_long = "This is a very long source sentence and if a neural machine translation system translates it into German it is possible that some words or even combinations of words will be missed in the translation."
tgt_long = "Dies ist ein sehr langer Ausgangssatz, und wenn ein neuronales maschinelles Übersetzungssystem ihn ins Deutsche übersetzt, ist es möglich aber nicht wahrscheinlich, dass einige Wörter oder sogar Wortkombinationen in der Übersetzung fehlen."

num_repetitions = 1000

## Our approach

In [3]:
# Load model
from coverage.evaluator import CoverageEvaluator
from translation_models import load_forward_and_backward_model

forward_model, backward_model = load_forward_and_backward_model("mbart50", src_lang="en", tgt_lang="de")

evaluator = CoverageEvaluator(
  src_lang="en",
  tgt_lang="de",
  forward_evaluator=forward_model,
  backward_evaluator=backward_model,
  batch_size=16,
)

### Additions only

In [4]:
evaluator.forward_evaluator = None

# Including parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
    ), number=num_repetitions))

# Excluding parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    src_doc = evaluator.src_parser(src)
    tgt_doc = evaluator.tgt_parser(tgt)
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
        src_doc=src_doc,
        translation_doc=tgt_doc,
    ), number=num_repetitions))

39.73716928437352


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


164.98447458446026
18.294793820008636
101.81147654913366


### Omissions only

In [6]:
evaluator.forward_evaluator = forward_model
evaluator.backward_evaluator = None

# Including parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
    ), number=num_repetitions))

# Excluding parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    src_doc = evaluator.src_parser(src)
    tgt_doc = evaluator.tgt_parser(tgt)
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
        src_doc=src_doc,
        translation_doc=tgt_doc,
    ), number=num_repetitions))

44.974007941782475
197.40284639038146
20.97033300064504
143.96992459148169


### Both error types

In [7]:
evaluator.forward_evaluator = forward_model
evaluator.backward_evaluator = backward_model

# Including parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
    ), number=num_repetitions))

# Excluding parser
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    src_doc = evaluator.src_parser(src)
    tgt_doc = evaluator.tgt_parser(tgt)
    print(timeit.timeit(lambda: evaluator.detect_errors(
        src=src,
        translation=tgt,
        src_doc=src_doc,
        translation_doc=tgt_doc,
    ), number=num_repetitions))


82.75419463589787
365.30587567947805
37.87803195416927
238.93906163983047


## Baseline

In [3]:
# Load model
from kiwi.lib.predict import load_system
runner = load_system(Path(".") / "runs/0/newscrawl.en-de.partial.100k.large.seed1/checkpoints/model_epoch=05-val_source_tags_F1_MULT+target_tags_F1_MULT=1.88.ckpt", gpu_id=0)

Vocabulary for source already exists; not loading it again
Vocabulary for target already exists; not loading it again
Vocabulary for pe already exists; not loading it again


In [4]:
# Measure inference time
for src, tgt in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    print(timeit.timeit(lambda: runner.predict(
        source=[src],
        target=[tgt],
    ), number=num_repetitions))

24.569993468001485


25.414803167805076





## Translating

In [4]:
for src, _ in [
    (src_short, tgt_short),
    (src_long, tgt_long),
]:
    print(timeit.timeit(lambda: forward_model.translate(
        src_lang="en",
        tgt_lang="de",
        sentences=[src],
        beam=5,
    ), number=num_repetitions))



253.98270384594798
861.1719417851418
