In [16]:
%%writefile requirements.txt

rouge_score==0.0.4
jsonlines==3.0.0
datasets==1.18.2
scispacy==0.4.0
unbabel-comet==1.1.2
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz

Overwriting requirements.txt


In [17]:
!pip install -r requirements.txt
!pip install mlflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz (from -r requirements.txt (line 7))
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz (15.6 MB)
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [18]:
from comet import download_model, load_from_checkpoint

In [19]:
import sys
import json
import jsonlines
import os
from glob import glob
from datasets import load_metric
from comet import download_model, load_from_checkpoint
from nltk import word_tokenize
import scispacy
import spacy
import logging
import mlflow

BLEU = 'bleu'
ROUGE = 'rouge'
SACREBLEU = 'sacrebleu'
METEOR = 'meteor'
COMET = 'comet'
NER_ACCURACY = 'ner_accuracy'
COMET_MODEL = "wmt21-cometinho-da"

In [20]:
def read_jsonlines(path, source_language, target_language):
    with jsonlines.open(path, 'r') as reader:
        target, source = [], []
        for obj in reader:
            for line in obj:
              target.append(line['translation'][target_language])
              source.append(line['translation'][source_language])
        return target, source


def compute_metric(metric_name, references, predictions):
    return load_metric(metric_name).compute(predictions=predictions,
                                            references=references)


def ner_accuracy(ner, target_ref, target_pred):
    with open(ner, 'r', encoding='utf8') as f:
        data = json.load(f)
    accuracies = []
    nlp = spacy.load("en_core_sci_sm", exclude=["ner", "parser"])
    pos_tags = ['PROPN', 'VERB', 'NOUN', 'ADJ']
    for ref, pred in zip(target_ref, target_pred):
        accuracy = 1.0
        ner_reference = data.get(ref, [])
        lemmas = ' '.join([word.lemma_ for word in nlp(pred) if
                           word.pos_ in pos_tags])
        if ner_reference:
            accuracy = 0
            for entity in ner_reference:
                entity_text = ' '.join(
                    [word.lemma_ for word in nlp(entity['text']) if
                     word.pos_ in pos_tags])
                if (entity_text.lower() in lemmas.lower()) or (
                        entity['text'] in pred):
                    accuracy += 1
                else:
                    logging.info(
                        f"named entity not translated: {entity['text']},"
                        f" {pred}, {entity['labels'][0]}")
            accuracies.append(accuracy / len(ner_reference))
        else:
            accuracies.append(accuracy)
    return accuracies, sum(accuracies) / len(target_pred)


def write_outfile(results, output_dir, source_language, target_language):
    mlflow.set_tracking_uri("http://18.157.246.27:8080")
    mlflow.set_experiment("my-experiment-fr-en")
    logging.info(results)
    mlflow.log_metric("COMET", results[COMET][-1] )
    mlflow.log_metric("BLEU", results[BLEU][BLEU] )
    mlflow.log_metric("ROUGE-L-F1", results[ROUGE]['rougeL'].mid.fmeasure )
    mlflow.end_run()

In [21]:
submit_dir ='input'
truth_dir = 'output'
if not os.path.exists(truth_dir):
    os.makedirs(truth_dir)
# answer_path = glob(f"{submit_dir}/*.jsonl*")[0]
source_language, target_language = "fr", "en"
answer_path = os.path.join(submit_dir, 'mapping_predictions.jsonl')
truth_file = os.path.join(truth_dir, "mapping_original.jsonl")
ner = os.path.join(truth_dir, 'ner.json')

In [22]:
target_ref, source_ref = read_jsonlines(truth_file, source_language, target_language)
target_pred, source_pred = read_jsonlines(answer_path, source_language, target_language)


In [23]:
print(len(target_ref))
print(len(target_pred))

513
513


In [24]:
assert source_pred == source_ref
assert len(target_ref) == len(target_pred)

In [25]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [26]:


bleu_references = [word_tokenize(text) for text in target_ref]
bleu_references = [[ref] for ref in
                    bleu_references]  # allows many true translations
bleu_predictions = [word_tokenize(text) for text in target_pred]
sacrebleu_references = [[ref] for ref in target_ref]
model_path = download_model(COMET_MODEL)
model = load_from_checkpoint(model_path)
logging.info("Comet checkpoint loaded")
comet_data = [{"src": src, "mt": mt, "ref": ref} for src, mt, ref in
              zip(source_ref, target_pred, target_ref)]
results = {BLEU: compute_metric(BLEU, bleu_references, bleu_predictions),
            ROUGE: compute_metric(ROUGE, target_ref, target_pred),
            SACREBLEU: compute_metric(SACREBLEU, sacrebleu_references,
                                      target_pred),
            METEOR: compute_metric(METEOR, target_ref, target_pred),
            # NER_ACCURACY: ner_accuracy(ner, target_ref, target_pred),
            COMET: model.predict(comet_data, gpus=0),
            }
write_outfile(results, truth_dir, source_language, target_language)

wmt21-cometinho-da.tar.gz: 344MB [00:08, 40.9MB/s]                           


Downloading sentencepiece.bpe.model:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/689 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/408M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.49k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.21k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|██████████| 65/65 [00:37<00:00,  1.72it/s]
2022/08/23 07:40:21 INFO mlflow.tracking.fluent: Experiment with name 'my-experiment-fr-en' does not exist. Creating a new experiment.
