In [21]:
import pandas as pd
import numpy as np
from datasets import load_metric, load_dataset, Dataset
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from transformers import pipeline, TrainingArguments, Trainer, DataCollatorWithPadding, DataCollatorForSeq2Seq
from transformers import AutoTokenizer, T5ForConditionalGeneration
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
from peft import AutoPeftModelForCausalLM
import evaluate
from functools import partial
from peft import LoraConfig, get_peft_model
import torch
from src.envhelpers import set_env, det_gpu_status
from src.helpers import print_all_model_parameters, print_module_blocks, print_named_parameters, print_model_modules, print_model_attributes

In [2]:
set_env()
train_on_gpu = det_gpu_status()
import os
os.environ['REQUESTS_CA_BUNDLE'] = './certs/concatenated SSL bundle cert.cert'

CUDA is available!  Training on GPU ...
Memory allocated: 0
Max memory allocated: 0


In [3]:
source_lang = "en"
target_lang = "de"
langkeys = {"en" : "English", "de" : "German"}

# Load the opus dataset for German-English translations - there is no "en-de" dataset, so need to use "de-en"
germanEnglishDataset = load_dataset("Helsinki-NLP/opus-100", f"{target_lang}-{source_lang}", split="train[:150]")
#method 1a
#germanEnglishDataset = load_dataset("Helsinki-NLP/opus-100", f"{source_lang}-{target_lang}")

In [4]:
# method 1
# This is a Dataset object, so we can call test_train_split
# germanEnglishDataset["train"] is a dictionary that has the "Translation" column, so can't use test_train_split on that object
dataset = germanEnglishDataset.train_test_split(test_size=0.2)
#method 1a
# this somehow is a Dataset object though? If use method 1a above to load dataset
# dataset = germanEnglishDataset["train"].train_test_split(test_size=0.2)

# method 2
# Access the specific splits for training, validation, or testing dictionaries
# train_dataset = dataset['train']
# validation_dataset = dataset['validation']
# test_dataset = dataset['test']

# Get a key, value pair from the first element of the dataset
example_translation_pair = dataset["train"]['translation'][0] # method 1
#example_translation_pair = dataset['translation'][0] # method 1a
#example_translation_pair = train_dataset[0] #method 2
print("Example translation pair:")
print("German:", example_translation_pair['de'])
print("English:", example_translation_pair['en'])

Example translation pair:
German: Sie würden herausfinden wollen: ,Bin ich allein?"
English: You're attempting to find, "Am I alone?"


In [5]:
gpt2 = "gpt2"
googleT5 = "google-t5/t5-small"

# 4 bit quantization during storage, 32 bit during fine tuning attention weights
qLoRA_config_4bit = BitsAndBytesConfig(
    load_in_4bit=True,  
    load_in_8bit=False,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

# GPT-2
gpt2Tokenizer = AutoTokenizer.from_pretrained(gpt2)
gpt2Model = AutoModelForCausalLM.from_pretrained(gpt2, quantization_config=qLoRA_config_4bit)

# # Google T5
t5Tokenizer = AutoTokenizer.from_pretrained(googleT5)
t5Model = AutoModelForSeq2SeqLM.from_pretrained(googleT5, quantization_config=qLoRA_config_4bit)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Example

In [6]:
#print_model_modules(gpt2Model)
# Output -> Model name: gpt2
# Module Name: transformer, Module Type: <class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>
# Module Name: lm_head, Module Type: <class 'torch.nn.modules.linear.Linear'>
# Modules found from _modules, make sure to call model.module

#print_module_blocks(gpt2Model, "transformer")
# Output -> ['wte', 'wpe', 'drop', 'h', 'ln_f']
print_module_blocks(gpt2Model.transformer, "h")
# Output -> {'0': GPT2Block((ln_1):...(attn)..., '1': GPT2Block((ln_1):...(attn)...,...}

# base_model and transformer are the same property for this model
#print(gpt2Model.base_model.h[-1].attn)
#print(gpt2Model.transformer.h[-1].attn)

# Get the last layer/block Self Attention heads from each model - example for the names of the matrix parameters we want to tune
# print(gpt2Model.base_model.h[-1].attn)
# print(t5Model.base_model.decoder.block[-1].layer[0].SelfAttention)

# # Print the attention blocks and their weight parameters in the Encoder
# # The T5 Model's Decoder has 6 T5Stack object, each of which has a ModuleList that has 6 T5Block objects, which represent a layer

# print_named_parameters(gpt2Model.base_model.h[-1].attn)

{'0': GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Linear4bit(in_features=768, out_features=2304, bias=True)
    (c_proj): Linear4bit(in_features=768, out_features=768, bias=True)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.1, inplace=False)
  )
  (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (mlp): GPT2MLP(
    (c_fc): Linear4bit(in_features=768, out_features=3072, bias=True)
    (c_proj): Linear4bit(in_features=3072, out_features=768, bias=True)
    (act): NewGELUActivation()
    (dropout): Dropout(p=0.1, inplace=False)
  )
), '1': GPT2Block(
  (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (attn): GPT2SdpaAttention(
    (c_attn): Linear4bit(in_features=768, out_features=2304, bias=True)
    (c_proj): Linear4bit(in_features=768, out_features=768, bias=True)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (resid_dropout): Dropout(p=0.

Preprocess and Tokenize

In [7]:
prefix = f"translate {langkeys[f'{source_lang}']} to {langkeys[f'{target_lang}']}: "
#prefix = f"translate {langkeys[f'{target_lang}']} to {langkeys[f'{source_lang}']}: "

def preprocess(examples) -> tuple[list[str], list[str]]:
    inputs = [prefix + example[source_lang] for example in examples["translation"]]
    targets = [example[target_lang] for example in examples["translation"]]
    # Do tokenization separately as need different tokenizer for T5, GPT2
    #tokenizer(batch["text"], padding="max_length", truncation=True)
    #model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
    return inputs, targets

def tokenizeT5(tokenizer, input_tuple: tuple[list[str], list[str]]):
    inputs, targets = input_tuple
    model_inputs = tokenizer(inputs, text_target=targets, max_length=50, truncation=True)
    return model_inputs

# Setting the padding=max_length seems required or an error will get thrown for GPT2, but takes more memory
def tokenizeGpt(tokenizer, input_tuple: tuple[list[str], list[str]]):
    inputs, targets = input_tuple
    model_inputs = tokenizer(inputs, text_target=targets, max_length=15, padding="max_length", truncation=True)
    return model_inputs

In [8]:
# Repeat for both tokenizers

gpt2Tokenizer.pad_token = gpt2Tokenizer.eos_token

# Or add a new `[PAD]` token as the padding token - this doesn't work for GPT tokenizer
#gpt2Tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# method 1 - this will have "test" and "train" columns from test_train_split (on original train column) to separately input to Trainer function
tokenized_examples_t5 = dataset.map(lambda example: tokenizeT5(t5Tokenizer, preprocess(example)), batched=True)

tokenized_examples_gpt = dataset.map(lambda example: tokenizeGpt(gpt2Tokenizer, preprocess(example)), batched=True)

# method 2
# tokenized_train = train_dataset.map(lambda example: tokenize(gpt2Tokenizer, preprocess(example)), batched=True)
# tokenized_test = test_dataset.map(lambda example: tokenize(gpt2Tokenizer, preprocess(example)), batched=True)

Map: 100%|██████████| 120/120 [00:00<00:00, 5002.75 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 1930.43 examples/s]
Map: 100%|██████████| 120/120 [00:00<00:00, 5998.15 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 1873.29 examples/s]


Evaluate Performance on Model using a Pipeline and a sample

In [9]:
# Evaluation metrics
sacrebleu_metric = evaluate.load("sacrebleu")

sample_text1 = f"{prefix}Had he arrived already, I would have heard him enter the store, but apparently he never showed up."
sample_text2 = f"{prefix}I would have liked to have saved them both"

# Try to reverse translate target language to source to evaluate performance
translator = pipeline(f"translation_{target_lang}_to_{source_lang}", model=googleT5, max_length=400, device=0) # GPU

reference1 = "Wäre er schon da gewesen, hätte ich gehört, wie er den Laden betrat, aber anscheinend ist er nie aufgetaucht."
reference2 = "Ich hätte gern beide gerettet"

translated_sentence1 = translator(sample_text1)
translation = translated_sentence1[0]["translation_text"]

# Compute the SacreBLEU score for the translated output
sacrebleu_score = sacrebleu_metric.compute(predictions=[translation],
                                            references=[[reference1]])

# Display the SacreBLEU score for the translation
print(sample_text1)
print(f"Translation: {translation}")
print(f"Reference: {reference1}")
print("SacreBLEU score:", sacrebleu_score["score"])

translate English to German: Had he arrived already, I would have heard him enter the store, but apparently he never showed up.
Translation: Hätte er bereits angekommen, hätte ich ihn in den Laden hören können, aber er hat sich offenbar nie gezeigt.
Reference: Wäre er schon da gewesen, hätte ich gehört, wie er den Laden betrat, aber anscheinend ist er nie aufgetaucht.
SacreBLEU score: 10.110173740732797


In [10]:
metric = evaluate.load("sacrebleu")

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds, metric, tokenizer):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

Training using LoRA

In [11]:
# GPT-2 DataCollator
data_collator = DataCollatorWithPadding(tokenizer=gpt2Tokenizer)

gpt2config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["c_attn",
                    "c_proj"],
    lora_dropout=0.1,
    bias="none",
    #modules_to_save=["classifier"],
)
lora_gpt2Model = get_peft_model(gpt2Model, gpt2config)

# Define the path to the directory where the training artifacts are stored
training_args_path_gpt = f"hess_german_to_english_{gpt2}"

# GPT-2 Training

#lora_gpt2Model.to("cuda" if train_on_gpu else "cpu")
training_args = TrainingArguments(
    output_dir= training_args_path_gpt,
    evaluation_strategy="epoch",
    learning_rate=2e-5, #5e-3
    per_device_train_batch_size=1, #128, or 4
    per_device_eval_batch_size=1, # or 2
    weight_decay=0.01,
    # save_total_limit=3,
    num_train_epochs=1,
    # fp16=True,
    # push_to_hub=False
)

# Retrieve the path to the trained model from the TrainingArguments
gpt2_model_path = training_args.output_dir

partial_compute_metrics = partial(compute_metrics, metric=sacrebleu_metric, tokenizer=gpt2Tokenizer)

# Load LoRA model
trainer = Trainer(
    model=lora_gpt2Model,
    args=training_args,
    train_dataset=tokenized_examples_gpt["train"],
    eval_dataset=tokenized_examples_gpt["test"],
    tokenizer=gpt2Tokenizer,
    data_collator=data_collator,
    compute_metrics=partial_compute_metrics
)

# Train and evaluate the transformer on the tokenized train and test data
try:

   trainer.train()
   #trainer.evaluate()
   
except RuntimeError as e:
   if 'CUDA out of memory.' in str(e):
        print("CUDA out of memory error occured")
        torch.cuda.empty_cache() 

  attn_output = torch.nn.functional.scaled_dot_product_attention(
                                                 
100%|██████████| 120/120 [00:12<00:00,  9.92it/s]

{'eval_runtime': 0.9439, 'eval_samples_per_second': 31.783, 'eval_steps_per_second': 31.783, 'epoch': 1.0}
{'train_runtime': 12.1007, 'train_samples_per_second': 9.917, 'train_steps_per_second': 9.917, 'train_loss': 9.059440104166667, 'epoch': 1.0}





In [12]:
try:

   trainer.evaluate()
   
except RuntimeError as e:
   if 'CUDA out of memory.' in str(e):
        print("CUDA out of memory error occured")
        torch.cuda.empty_cache() 

100%|██████████| 30/30 [00:00<00:00, 33.16it/s]


In [13]:
# Seq2Seq DataCollator for TrainingArguments object input
data_collator_seq_2_seq = DataCollatorForSeq2Seq(tokenizer=t5Tokenizer, model=googleT5)

t5config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q",
                    "v"],
    lora_dropout=0.1,
    bias="lora_only",
    modules_to_save=["decode_head"],
)
lora_t5Model = get_peft_model(t5Model, t5config)  

In [14]:
# Define the path to the directory where the training artifacts are stored
training_args_path_t5 = f"hess_german_to_english_{googleT5}"

# Google T-5 Training
#lora_t5Model.to("cuda" if train_on_gpu else "cpu")
training_args = Seq2SeqTrainingArguments(
    output_dir= training_args_path_t5,
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    weight_decay=0.01,
    #save_total_limit=3,
    num_train_epochs=20,
    #predict_with_generate=True,
    #fp16=True,
    #push_to_hub=False
)

# Retrieve the path to the trained model from the TrainingArguments
t5_model_path = training_args.output_dir

partial_compute_metrics = partial(compute_metrics, metric=sacrebleu_metric, tokenizer=t5Tokenizer)

# Load LoRA model
trainer = Seq2SeqTrainer(
    model=lora_t5Model,
    args=training_args,
    train_dataset=tokenized_examples_t5["train"],
    eval_dataset=tokenized_examples_t5["test"],
    tokenizer=t5Tokenizer,
    data_collator=data_collator_seq_2_seq,
    compute_metrics=partial_compute_metrics
)

# Train and evaluate the transformer on the tokenized train and test data
try:

   trainer.train()
   trainer.evaluate()
   
except RuntimeError as e:
   if 'CUDA out of memory.' in str(e):
        print("CUDA out of memory error occured")
        torch.cuda.empty_cache()
         
torch.cuda.empty_cache() 

                                                  
  5%|▌         | 121/2400 [00:14<21:43,  1.75it/s]

{'eval_runtime': 1.5443, 'eval_samples_per_second': 19.426, 'eval_steps_per_second': 19.426, 'epoch': 1.0}


                                                  
 10%|█         | 241/2400 [00:28<11:27,  3.14it/s]

{'eval_runtime': 1.3941, 'eval_samples_per_second': 21.52, 'eval_steps_per_second': 21.52, 'epoch': 2.0}


                                                  
 15%|█▌        | 361/2400 [00:41<10:40,  3.18it/s]

{'eval_runtime': 1.3786, 'eval_samples_per_second': 21.761, 'eval_steps_per_second': 21.761, 'epoch': 3.0}


                                                  
 20%|██        | 481/2400 [00:55<11:38,  2.75it/s]

{'eval_runtime': 1.3908, 'eval_samples_per_second': 21.57, 'eval_steps_per_second': 21.57, 'epoch': 4.0}


 21%|██        | 500/2400 [00:56<03:15,  9.74it/s]

{'loss': 2.3234, 'grad_norm': 0.3837828040122986, 'learning_rate': 1.5833333333333333e-05, 'epoch': 4.17}


                                                  
 25%|██▌       | 601/2400 [01:09<13:22,  2.24it/s]

{'eval_runtime': 1.4, 'eval_samples_per_second': 21.429, 'eval_steps_per_second': 21.429, 'epoch': 5.0}


                                                  
 30%|███       | 722/2400 [01:22<08:42,  3.21it/s]

{'eval_runtime': 1.3998, 'eval_samples_per_second': 21.431, 'eval_steps_per_second': 21.431, 'epoch': 6.0}


                                                  
 35%|███▌      | 842/2400 [01:36<07:03,  3.68it/s]

{'eval_runtime': 1.3781, 'eval_samples_per_second': 21.77, 'eval_steps_per_second': 21.77, 'epoch': 7.0}


                                                  
 40%|████      | 960/2400 [01:49<02:23, 10.05it/s]

{'eval_runtime': 1.4263, 'eval_samples_per_second': 21.034, 'eval_steps_per_second': 21.034, 'epoch': 8.0}


 42%|████▏     | 1000/2400 [01:53<02:19, 10.03it/s]

{'loss': 2.3371, 'grad_norm': 1.1871904134750366, 'learning_rate': 1.1666666666666668e-05, 'epoch': 8.33}


                                                   
 45%|████▌     | 1081/2400 [02:03<11:40,  1.88it/s]

{'eval_runtime': 1.4195, 'eval_samples_per_second': 21.134, 'eval_steps_per_second': 21.134, 'epoch': 9.0}


                                                   
 50%|█████     | 1202/2400 [02:16<05:27,  3.65it/s]

{'eval_runtime': 1.3981, 'eval_samples_per_second': 21.458, 'eval_steps_per_second': 21.458, 'epoch': 10.0}


                                                   
 55%|█████▌    | 1320/2400 [02:30<01:47, 10.07it/s]

{'eval_runtime': 1.3737, 'eval_samples_per_second': 21.838, 'eval_steps_per_second': 21.838, 'epoch': 11.0}


                                                   
 60%|██████    | 1442/2400 [02:43<04:58,  3.21it/s]

{'eval_runtime': 1.3867, 'eval_samples_per_second': 21.634, 'eval_steps_per_second': 21.634, 'epoch': 12.0}


 62%|██████▎   | 1500/2400 [02:49<01:29, 10.10it/s]

{'loss': 2.2501, 'grad_norm': 0.2790587544441223, 'learning_rate': 7.500000000000001e-06, 'epoch': 12.5}


                                                   
 65%|██████▌   | 1561/2400 [02:57<05:11,  2.69it/s]

{'eval_runtime': 1.3772, 'eval_samples_per_second': 21.783, 'eval_steps_per_second': 21.783, 'epoch': 13.0}


                                                   
 70%|███████   | 1680/2400 [03:10<01:11, 10.05it/s]

{'eval_runtime': 1.4027, 'eval_samples_per_second': 21.387, 'eval_steps_per_second': 21.387, 'epoch': 14.0}


                                                   
 75%|███████▌  | 1801/2400 [03:24<03:06,  3.21it/s]

{'eval_runtime': 1.3696, 'eval_samples_per_second': 21.905, 'eval_steps_per_second': 21.905, 'epoch': 15.0}


                                                   
 80%|████████  | 1921/2400 [03:37<02:27,  3.24it/s]

{'eval_runtime': 1.3824, 'eval_samples_per_second': 21.701, 'eval_steps_per_second': 21.701, 'epoch': 16.0}


 83%|████████▎ | 2000/2400 [03:45<00:40,  9.96it/s]

{'loss': 2.2847, 'grad_norm': 0.8167466521263123, 'learning_rate': 3.3333333333333333e-06, 'epoch': 16.67}


                                                   
 85%|████████▌ | 2041/2400 [03:51<02:53,  2.06it/s]

{'eval_runtime': 1.3841, 'eval_samples_per_second': 21.675, 'eval_steps_per_second': 21.675, 'epoch': 17.0}


                                                   
 90%|█████████ | 2161/2400 [04:04<02:14,  1.78it/s]

{'eval_runtime': 1.5231, 'eval_samples_per_second': 19.696, 'eval_steps_per_second': 19.696, 'epoch': 18.0}


                                                   
 95%|█████████▌| 2280/2400 [04:18<00:11, 10.04it/s]

{'eval_runtime': 1.3885, 'eval_samples_per_second': 21.606, 'eval_steps_per_second': 21.606, 'epoch': 19.0}


                                                   
100%|██████████| 2400/2400 [04:32<00:00,  8.82it/s]


{'eval_runtime': 1.4069, 'eval_samples_per_second': 21.324, 'eval_steps_per_second': 21.324, 'epoch': 20.0}
{'train_runtime': 272.0043, 'train_samples_per_second': 8.823, 'train_steps_per_second': 8.823, 'train_loss': 2.285186462402344, 'epoch': 20.0}


100%|██████████| 30/30 [00:01<00:00, 21.86it/s]


In [24]:
lora_gpt2Model.save_pretrained(gpt2_model_path, from_pt=True) 
lora_t5Model.save_pretrained(t5_model_path, from_pt=True)
print(t5_model_path)

hess_german_to_english_google-t5/t5-small


Inference

In [15]:
def translate(text: str, model, tokenizer, use_peft=False) -> str:
    """
    function to translate a text using a model and tokenizer
    """
    with torch.no_grad():
        # encode the text using the tokenizer
        inputs = tokenizer(text, return_tensors="pt").input_ids
        # input_ids = inputs["input_ids"]

        attention_mask=inputs.ne(tokenizer.pad_token_id).float()
        # generate the translation using the model
        if use_peft:
            outputs = model.generate(input_ids=inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95, attention_mask=attention_mask)
        else:
            outputs = model.generate(**inputs)

        # decode the generated output and return the translated text
        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return decoded

In [25]:
# Try to reverse translate target language to source to evaluate performance
#translator = pipeline(f"translation_{target_lang}_to_{source_lang}", model=googleT5, device=0) # GPU
# Or...manually replocate the results of the pipeline:

# Load the PEFT model from the saved directory from the Training args output_dir where the model was saved
#gpt2_peft_model = TFPegasusForConditionalGeneration.from_pretrained(gpt2_model_path)
t5_peft_model = T5ForConditionalGeneration.from_pretrained(t5_model_path)

# Evaluate T5 to translate original text post-LoRA and qLoRA training
# NOTE: GPT-2 does not perform well on translation task, so use T5 model
translation1 = translate(sample_text1, t5_peft_model, t5Tokenizer, use_peft=True)
#translation1 = translate(sample_text1, lora_t5Model, t5Tokenizer, use_peft=True)

# Compute the SacreBLEU score for the translated output
sacrebleu_score = sacrebleu_metric.compute(predictions=[translation1],
                                            references=[[reference1]])

# Display the SacreBLEU score for the translation
print(sample_text1 + "\n")
print(f"Original Translation:" + "\n" + f"{translation}" + "\n")
print(f"Post-training Translation:" + "\n" + f"{translation1}" + "\n")
print(f"Reference: {reference1}")
print("SacreBLEU score:", sacrebleu_score["score"])

translate English to German: Had he arrived already, I would have heard him enter the store, but apparently he never showed up.

Original Translation:
Hätte er bereits angekommen, hätte ich ihn in den Laden hören können, aber er hat sich offenbar nie gezeigt.

Post-training Translation:
Wenn er bereits gekommen wäre, hätte ich ihn in den Laden geredet, aber es scheint, dass er nie aufgekommen ist.

Reference: Wäre er schon da gewesen, hätte ich gehört, wie er den Laden betrat, aber anscheinend ist er nie aufgetaucht.
SacreBLEU score: 10.624917754018972
