### Load a test dataset:

In [1]:
#imports
from datasets import load_dataset, get_dataset_config_names
from datasets import get_dataset_split_names

In [2]:
wmt17 = load_dataset('wmt17', 'de-en')

Reusing dataset wmt17 (C:\Users\jvero\.cache\huggingface\datasets\wmt17\de-en\1.0.0\626ef4b4893062acbd74367a303a029b0a61c6a093453dd1e55477155d7b27c2)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
# print 5 german and 5 english translations
de_inp = [wmt17["test"][i]["translation"]["de"] for i in range(5)]
en_ref = [wmt17["test"][i]["translation"]["en"] for i in range(5)]

In [4]:
print(de_inp)
print('\n', en_ref)

['28-jähriger Koch in San Francisco Mall tot aufgefunden', 'Ein 28-jähriger Koch, der vor kurzem nach San Francisco gezogen ist, wurde im Treppenhaus eines örtlichen Einkaufzentrums tot aufgefunden.', 'Der Bruder des Opfers sagte aus, dass er sich niemanden vorstellen kann, der ihm schaden wollen würde, "Endlich ging es bei ihm wieder bergauf."', 'Der am Mittwoch morgen in der Westfield Mall gefundene Leichnam wurde als der 28 Jahre alte Frank Galicia aus San Francisco identifiziert, teilte die gerichtsmedizinische Abteilung in San Francisco mit.', 'Das San Francisco Police Department sagte, dass der Tod als Mord eingestuft wurde und die Ermittlungen am Laufen sind.']

 ['28-Year-Old Chef Found Dead at San Francisco Mall', 'A 28-year-old chef who had recently moved to San Francisco was found dead in the stairwell of a local mall this week.', 'But the victim\'s brother says he can\'t think of anyone who would want to hurt him, saying, "Things were finally going well for him."', "The bod

### Generate Translations Using a Transformer Model
Load a german to english transformer model

In [5]:
import torch

import pandas as pd
import numpy as np

import datasets
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/bert2bert_L-24_wmt_de_en", pad_token="<pad>", eos_token="</s>", bos_token="<s>")
model = AutoModelForSeq2SeqLM.from_pretrained("google/bert2bert_L-24_wmt_de_en")
model.to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


EncoderDecoderModel(
  (encoder): BertGenerationEncoder(
    (embeddings): BertGenerationEmbeddings(
      (word_embeddings): Embedding(31950, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
    

In [7]:
# tokenize german sentences
input_ids = tokenizer(
    de_inp,
    return_tensors="pt",
    add_special_tokens=False,
    padding=True,
    truncation=True).input_ids.to(device)
# generate english translation
output_ids = model.generate(input_ids)
# decode translation
translations = [
    tokenizer.decode(out_ids, skip_special_tokens=True) for out_ids in output_ids
]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [8]:
print(translations)

['28 - year old cock in san francisco mall found dead.', 'A 28-year-old cook, who recently moved to san francisco, was found dead in the escalator in an official shopping centre.', 'the brother of the sacrifice said that he could not imagine anyone who would harm him, "at last it was uphill again with him".', 'the corpse found on Wednesday morning in the westfield mall was identified as the 28 year old frank galicia from san francisco, informed the forensic division in san francisco.', 'the san francisco police department said that the tod was classified as mord and the investigation is underway.']


### Import metric with HuggingFaces

In [9]:
from datasets import load_metric

In [10]:
# load own implementation
metric = load_metric("MoverScore_HF_implementation_JV.py")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
test_refs = [translations]
print(test_refs)

[['28 - year old cock in san francisco mall found dead.', 'A 28-year-old cook, who recently moved to san francisco, was found dead in the escalator in an official shopping centre.', 'the brother of the sacrifice said that he could not imagine anyone who would harm him, "at last it was uphill again with him".', 'the corpse found on Wednesday morning in the westfield mall was identified as the 28 year old frank galicia from san francisco, informed the forensic division in san francisco.', 'the san francisco police department said that the tod was classified as mord and the investigation is underway.']]


In [12]:
metric.corpus_score(en_ref, test_refs)

0.6787179553540694

In [13]:
metric._compute(en_ref, test_refs)

0.6787179553540694

### Compare with example on github

In [14]:
refs = [['The dog bit the man.', 'It was not unexpected.', 'The man bit him first.'],
        ['The dog had bit the man.', 'No one was surprised.', 'The man had bitten the dog.']]
sys = ['The dog bit the man.', "It wasn't surprising.", 'The man had just bitten him.']

In [15]:
metric.corpus_score(sys, refs)

0.7390671673481917