If you're opening this Notebook on colab, you will probably need to install 🤗 Transformers and 🤗 Datasets as well as other dependencies. Uncomment the following cell and run it.

In [1]:
# ! pip -q install datasets transformers rouge-score nltk

[K     |████████████████████████████████| 298 kB 12.3 MB/s 
[K     |████████████████████████████████| 3.4 MB 48.7 MB/s 
[K     |████████████████████████████████| 61 kB 486 kB/s 
[K     |████████████████████████████████| 1.1 MB 40.5 MB/s 
[K     |████████████████████████████████| 132 kB 47.0 MB/s 
[K     |████████████████████████████████| 243 kB 50.1 MB/s 
[K     |████████████████████████████████| 596 kB 49.4 MB/s 
[K     |████████████████████████████████| 895 kB 49.1 MB/s 
[K     |████████████████████████████████| 3.3 MB 40.1 MB/s 
[K     |████████████████████████████████| 192 kB 50.4 MB/s 
[K     |████████████████████████████████| 271 kB 48.9 MB/s 
[K     |████████████████████████████████| 160 kB 34.5 MB/s 
[?25h

If you're opening this notebook locally, make sure your environment has an install from the last version of those libraries.

To be able to share your model with the community and generate results like the one shown in the picture below via the inference API, there are a few more steps to follow.

First you have to store your authentication token from the Hugging Face website (sign up [here](https://huggingface.co/join) if you haven't already!) then execute the following cell and input your username and password:

In [None]:
# from huggingface_hub import notebook_login

# notebook_login()

Then you need to install Git-LFS. Uncomment the following instructions:

In [None]:
# !apt install git-lfs

Make sure your version of Transformers is at least 4.11.0 since the functionality was introduced in that version:

In [1]:
import transformers

print(transformers.__version__)

ModuleNotFoundError: No module named 'transformers'

You can find a script version of this notebook to fine-tune your model in a distributed fashion using multiple GPUs or TPUs [here](https://github.com/huggingface/transformers/tree/master/examples/seq2seq).

# Fine-tuning a model on a summarization task

In [3]:
import datetime
import json
import math
import os
import random
import time
import pprint
import string

from keras.preprocessing.sequence import pad_sequences
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np

In [4]:
# !pip install sentencepiece
import sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l[K     |▎                               | 10 kB 20.2 MB/s eta 0:00:01[K     |▌                               | 20 kB 25.2 MB/s eta 0:00:01[K     |▉                               | 30 kB 28.1 MB/s eta 0:00:01[K     |█                               | 40 kB 20.7 MB/s eta 0:00:01[K     |█▍                              | 51 kB 15.9 MB/s eta 0:00:01[K     |█▋                              | 61 kB 11.1 MB/s eta 0:00:01[K     |██                              | 71 kB 12.3 MB/s eta 0:00:01[K     |██▏                             | 81 kB 13.6 MB/s eta 0:00:01[K     |██▍                             | 92 kB 12.4 MB/s eta 0:00:01[K     |██▊                             | 102 kB 13.1 MB/s eta 0:00:01[K     |███                             | 112 kB 13.1 MB/s eta 0:00:01[K     |███▎                            | 122 kB 13.1 MB/s eta 0:00:01[K     |██

In [5]:
model_checkpoint = "google/mt5-base"

#### Change this to the particular data folder to use

In [8]:
data_dir = "./data/french/"
os.chdir(data_dir)

In [9]:
with open('train.json', 'r') as f1: 
    train_data = json.load(f1)
with open('dev.json', 'r') as f1:
    val_data = json.load(f1)

In [10]:
#for data in train_data:

def create_summary(train_data):

  """
  1) 'I am a student in<acronym>IITKGP</acronym>'
  """

  examples = []

  for data in train_data:

    ex = {}
    
    text = data['text']
    acronym_indices = data['acronyms']
    long_forms = data['long-forms']

    combined_tags = []
    for ac in acronym_indices:
      combined_tags.append((ac, 0))
    for lf in long_forms:
      combined_tags.append((lf, 1))
    combined_tags = sorted(combined_tags)
    tagged_text = text
    offset = 0
    #print(acronym_indices)

    for acronym in combined_tags:
      if acronym[1] == 0:
        new_text = tagged_text[:acronym[0][0] + offset]
        new_text += '<acronym>'
        new_text += tagged_text[acronym[0][0] + offset:acronym[0][1] + offset]
        new_text += '</acronym>'
        new_text += tagged_text[acronym[0][1] + offset:]
        tagged_text = new_text
        offset += 19
      elif acronym[1] == 1:
        new_text = tagged_text[:acronym[0][0] + offset]
        new_text += '<long>'
        new_text += tagged_text[acronym[0][0] + offset:acronym[0][1] + offset]
        new_text += '</long>'
        new_text += tagged_text[acronym[0][1] + offset:]
        tagged_text = new_text
        offset += 13

    ex['text'] = text
    ex['tagged_text'] = tagged_text
    examples.append(ex)
  
  return examples 

In [11]:
def create_targets(train_data, t):
  """
  2) 'Acronyms  ATP  YND  Long Forms are Indian Techncial Union and Maharashta Sena'  
  3) 'Acronyms: 141 162 123 173 Long Forms: 157 191 134 165'
  5) '141 162 123 173 S 157 191'
  4) 'ATP YND </s> INdian dkdslmslms'
  """

  examples = []

  for data in train_data:

    ex = {}
    
    text = data['text']
    acronym_indices = data['acronyms']
    long_forms = data['long-forms']
    acronym_indices = sorted(acronym_indices)
    long_forms = sorted(long_forms)

    if t == 2:
      tagged_text = '<Acronyms> '
      for ac in acronym_indices:
          tagged_text += text[ac[0]:ac[1]] + ', '
      
      tagged_text = tagged_text[:-2]
      tagged_text += ' <Long Forms> '
      for lf in long_forms:
        tagged_text += text[lf[0]:lf[1]] + ', '
    

    if t == 3:
      tagged_text = '<Acronyms> '
      for ac in acronym_indices:
          tagged_text += str(ac[0]) + ' ' + str(ac[1]) + ' '
      
      tagged_text += '<Long Forms> '
      for lf in long_forms:
        tagged_text += str(lf[0]) + ' ' + str(lf[1]) + ' '


    if t == 5:
      tagged_text = ''
      for ac in acronym_indices:
          tagged_text += str(ac[0]) + ' ' + str(ac[1]) + ' '
      
      tagged_text += '</s> '
      for lf in long_forms:
        tagged_text += str(lf[0]) + ' ' + str(lf[1]) + ' '

    
    if t == 4:
      tagged_text = ''
      for ac in acronym_indices:
          tagged_text += text[ac[0]:ac[1]] + ', '

      tagged_text = tagged_text[:-2]
      tagged_text += ' </s> '
      for lf in long_forms:
        tagged_text += text[lf[0]:lf[1]] +  ', '


    tagged_text = tagged_text[:-(1 + (t + 1) % 2)]
    ex['text'] = text
    ex['tagged_text'] = tagged_text
    examples.append(ex)
  
  return examples   


In [12]:
val_data[3]

{'ID': '4',
 'acronyms': [[331, 334], [3, 7]],
 'long-forms': [[282, 329]],
 'text': "Le PNUD appuiera l'élaboration et l'exécution de plans d'urgence dans les provinces en faveur de l'égalité des sexes et de plans d'adaptation et de réduction des risques de catastrophes naturelles.  Il poursuivra son action en matière de réduction des risques de catastrophe avec l'Organisation internationale pour les migrations (OMI) et d'autres organismes à travers des programmes conjoints. "}

This notebook is built to run  with any model checkpoint from the [Model Hub](https://huggingface.co/models) as long as that model has a sequence-to-sequence version in the Transformers library. Here we picked the [`t5-small`](https://huggingface.co/t5-small) checkpoint. 

## Loading the dataset

We will use the [🤗 Datasets](https://github.com/huggingface/datasets) library to download the data and get the metric we need to use for evaluation (to compare our model to the benchmark). This can be easily done with the functions `load_dataset` and `load_metric`.  

In [13]:
from datasets import load_dataset, load_metric

#raw_datasets = load_dataset("xsum")
metric = load_metric("rouge")

Downloading:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

The `dataset` object itself is [`DatasetDict`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasetdict), which contains one key for the training, validation and test set:

To access an actual element, you need to select a split first, then give an index:

In [14]:
train_dict = []
val_dict = []
target = create_targets(train_data,2)
for i in range(len(train_data)):
  ex = {}
  ex['document'] = target[i]['text'][:512]
  ex['summary'] = target[i]['tagged_text'][:512]
  ex['id'] = i + 1

  train_dict.append(ex)

target = create_targets(val_data, 2)
for i in range(len(val_data)):
  ex = {}
  ex['document'] = target[i]['text'][:512]
  ex['summary'] = target[i]['tagged_text'][:512]
  ex['id'] = i + 1 + len(train_data)

  val_dict.append(ex)


In [15]:
df_train_raw = pd.DataFrame(train_dict)
df_val_raw = pd.DataFrame(val_dict)

In [16]:
df_train_raw.to_csv('train.csv', index = False)
df_val_raw.to_csv('val.csv', index = False)

In [17]:
raw_datasets = load_dataset('csv', data_files={ 'train':'train.csv', 'validation': 'val.csv'})

Using custom data configuration default-bf84e3e1df5fe996


Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-bf84e3e1df5fe996/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-bf84e3e1df5fe996/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

To get a sense of what the data looks like, the following function will show some examples picked randomly in the dataset.

In [18]:
import datasets
import random
import pandas as pd
from IPython.display import display, HTML

def show_random_elements(dataset, num_examples=5):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

In [19]:
show_random_elements(raw_datasets["validation"])

Unnamed: 0,document,summary,id
0,Total par bureau de pays Bureau de liaison pour l'action contre l'Ebola à New York MINUAUCE = Mission des Nations Unies pour l'action d'urgence contre l'Ebola; SGA = Secrétaire général adjoint; SSG = Sous-Secrétaire général.,"<Acronyms> MINUAUCE, SGA, SSG <Long Forms> Mission des Nations Unies pour l'action d'urgence contre l'Ebola, Secrétaire général adjoint, Sous-Secrétaire général",7838
1,Transfert de 1 poste de fonctionnaire des finances (P-3) à la Section du développement des capacités du Service des budgets et des rapports sur leur exécution Transfert de 1 poste de chef de service (D-1) au Bureau de l'auditeur résident (Entebbe) Transfert de 1 poste d'auditeur résident (P-4) au Bureau de l'auditeur résident du Bureau d'appui de l'ONU pour la Mission de l'Union africaine en Somalie (UNSOA) Département de l'appui aux missions - Division du budget et des finances des missions - Service des b,"<Acronyms> ONU, UNSOA <Long Forms> Mission de l'Union africaine en Somalie",8415
2,"17. La situation en République centrafricaine a été plus stable mais, depuis janvier 2009, en raison d'affrontements qui ont récemment éclaté entre les forces armées et les rebelles dans le nord du pays, environ 17 000 personnes se sont réfugiées dans le sud du Tchad, notamment dans la zone qui relève du mandat de la Mission des Nations Unies en République centrafricaine et au Tchad (MINURCAT).",<Acronyms> MINURCAT <Long Forms> Mission des Nations Unies en République centrafricaine et au Tchad,8230
3,"21. En outre, les pays parties de l'annexe IV sont convenus de mettre en place des réseaux thématiques régionaux (RTR), en ont défini les mandats et ont rédigé une note méthodologique susceptible d'être consultée pour élaborer le mandat de chaque réseau aux niveaux régional et sous-régional.",<Acronyms> RTR <Long Forms> réseaux thématiques régionaux,7924
4,"Rappelant sa décision de lever les mesures relatives aux diamants imposées au paragraphe 6 de sa résolution 1521 (2003), Se félicitant de la participation du Gouvernement libérien au Système de certification du Processus de Kimberley, notant que le Libéria a mis en place les contrôles et autres recommandations d'ordre interne nécessaires pour satisfaire aux conditions du Processus de Kimberley, et engageant le Gouvernement libérien à poursuivre avec diligence les efforts qu'il déploie pour garantir l'effica",<Acronyms> MINUL <Long Forms> Mission des Nations Unies au Libéria,8159


The metric is an instance of [`datasets.Metric`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Metric):

In [20]:
metric

Metric(name: "rouge", features: {'predictions': Value(dtype='string', id='sequence'), 'references': Value(dtype='string', id='sequence')}, usage: """
Calculates average rouge scores for a list of hypotheses and references
Args:
    predictions: list of predictions to score. Each predictions
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
    rouge_types: A list of rouge types to calculate.
        Valid names:
        `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
        `"rougeL"`: Longest common subsequence based scoring.
        `"rougeLSum"`: rougeLsum splits text using `"
"`.
        See details in https://github.com/huggingface/datasets/issues/617
    use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
    use_agregator: Return aggregates if this is set to True
Retu

You can call its `compute` method with your predictions and labels, which need to be list of decoded strings:

In [21]:
!pip install sentencepiece



In [22]:
import sentencepiece

In [23]:
!pip install transformers[sentencepiece]



## Preprocessing the data

In [24]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained('google/mt5-base')

Downloading:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/702 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

In [25]:
if model_checkpoint in ["t5-small", "t5-base", "t5-larg", "t5-3b", "t5-11b"]:
    prefix = "summarize: "
else:
    prefix = ""

In [26]:
max_input_length = 4096
max_target_length = 4096

def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["document"]]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)

    # Setup the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=max_target_length, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

This function works with one or several examples. In the case of several examples, the tokenizer will return a list of lists for each key:

In [27]:
preprocess_function(raw_datasets['train'][:2])

{'input_ids': [[459, 8096, 340, 1451, 5826, 2995, 261, 520, 259, 21075, 263, 498, 259, 35577, 6988, 299, 20285, 1401, 498, 24964, 263, 261, 60474, 299, 383, 21494, 331, 277, 10957, 14216, 1052, 283, 259, 140896, 47168, 269, 283, 13441, 83447, 18946, 263, 264, 37897, 261, 907, 259, 2759, 2777, 78901, 1834, 289, 259, 8867, 28065, 260, 1641, 259, 5926, 289, 259, 5304, 498, 13275, 299, 1888, 111932, 1080, 259, 280, 277, 214298, 498, 259, 35577, 6988, 299, 763, 340, 331, 33424, 69375, 274, 54049, 17062, 271, 259, 10977, 283, 24964, 865, 259, 280, 277, 76852, 3906, 498, 1528, 299, 383, 283, 1451, 2016, 47168, 498, 259, 37897, 259, 262, 259, 3928, 20285, 1891, 289, 259, 27429, 24962, 259, 1], [8755, 260, 21873, 346, 14116, 520, 72936, 361, 171624, 263, 642, 432, 25065, 981, 261, 259, 280, 277, 53791, 498, 259, 35577, 6988, 299, 14116, 283, 331, 160323, 383, 340, 28564, 274, 89522, 11844, 271, 259, 262, 31470, 361, 1065, 289, 48582, 269, 1068, 865, 283, 14971, 331, 277, 48567, 606, 289, 73261,

In [28]:
tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)

  0%|          | 0/8 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

## Fine-tuning the model

Now that our data is ready, we can download the pretrained model and fine-tune it. Since our task is of the sequence-to-sequence kind, we use the `AutoModelForSeq2SeqLM` class. Like with the tokenizer, the `from_pretrained` method will download and cache the model for us.

In [29]:
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/2.17G [00:00<?, ?B/s]

In [30]:
batch_size = 2
model_name = model_checkpoint.split("/")[-1]
args = Seq2SeqTrainingArguments(
    model_name,
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=5,
    predict_with_generate=True,
)

In [31]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [32]:
import nltk
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
    
    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    # Extract a few results
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    
    # Add mean generated length
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)
    
    return {k: round(v, 4) for k, v in result.items()}

In [33]:
model.to('cuda')

MT5ForConditionalGeneration(
  (shared): Embedding(250112, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(250112, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedGeluDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (

In [34]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [35]:
model_checkpoint

'google/mt5-base'

We can now finetune our model by just calling the `train` method:

In [36]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [37]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: summary, document, id.
***** Running training *****
  Num examples = 7783
  Num Epochs = 5
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization steps = 19460


Epoch,Training Loss,Validation Loss


RuntimeError: ignored

You can now upload the result of the training to the Hub, just execute this instruction:

In [None]:
# trainer.push_to_hub()

In [38]:
predictions = trainer.predict(tokenized_datasets['validation'], max_length=1000)

The following columns in the test set  don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: summary, document, id.
***** Running Prediction *****
  Num examples = 973
  Batch size = 2


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: ignored

In [None]:
predictions[0]

In [None]:
result = tokenizer.batch_decode(predictions[0], skip_special_tokens=True)
# predictions[0]
# print(result[2])

In [None]:
from pprint import pprint

In [None]:
import torch

In [None]:
torch.save(result, '/content/result.pt')

In [None]:
torch.save(raw_datasets, '/content/raw_datasets.pt')

In [None]:
pred = result

In [None]:
import re

In [None]:
outs = []
for i in range(len(val_data)):
  try:
    print(i)
    pred_dict = {}
    pred_dict['ID'] = str(i + 1)
    pred_dict['acronyms'] = []
    pred_dict['long-forms'] = []
    pred_dict['text'] = val_data[i]['text']
    s = pred[i]
    if '<Acronyms>' not in s:
      outs.append(pred_dict)
      continue
    if '<Long Forms>' in s:
      ac = s.split('<Acronyms>')[1].split('<Long Forms>')[0].split(',')
      lf =  s.split('<Long Forms>')[1].split(',')
    else:
      ac = s.split('<Acronyms>')[1]
      lf = []
    ac_n = []
    lf_n = []
    for el in ac:
      ac_n.append(el.strip())
    for el in lf:
      lf_n.append(el.strip())

    ac = set(ac_n)
    lf = set(lf_n)
    for el in ac:
      acronym_ind = []
      #print(el.strip())
      start_list = [m.start() for m in re.finditer(el.strip(), pred_dict['text'])]
      for start in start_list:
        end = len(el.strip()) + start
        acronym_ind.append(start)
        acronym_ind.append(end)
        pred_dict['acronyms'].append(acronym_ind)
    
    for el in lf:
      lf_ind = []
      start_list = [m.start() for m in re.finditer(el.strip(), pred_dict['text'])]
      for start in start_list:
        end = len(el.strip()) + start
        lf_ind.append(start)
        lf_ind.append(end)
        pred_dict['long-forms'].append(lf_ind)

    outs.append(pred_dict)
  except:
    print(i)
    pred_dict = {}
    pred_dict['ID'] = str(i + 1)
    pred_dict['acronyms'] = []
    pred_dict['long-forms'] = []
    pred_dict['text'] = val_data[i]['text']
    outs.append(pred_dict)
    print('Error at i')

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [None]:
with open('output.json', 'w') as f:
  json.dump(outs, f, indent = 4)


In [None]:
! git clone https://github.com/amirveyseh/AAAI-22-SDU-shared-task-1-AE.git

fatal: destination path 'AAAI-22-SDU-shared-task-1-AE' already exists and is not an empty directory.


In [None]:
! python AAAI-22-SDU-shared-task-1-AE/scorer.py -g AAAI-22-SDU-shared-task-1-AE/data/french/dev.json -p output.json

tcmalloc: large alloc 4947607552 bytes == 0x55dc84fd0000 @  0x7efd0ffb81e7 0x55dc82bb5f98 0x55dc82b80e27 0x55dc82cff115 0x55dc82c99888 0x55dc82b846f2 0x55dc82c62c6e 0x55dc82c99802 0x55dc82b846f2 0x55dc82b87926 0x55dc82d027a3 0x55dc82b84349 0x55dc82c75e1d 0x55dc82bf7e99 0x55dc82bf2ced 0x55dc82b85bda 0x55dc82bf7d00 0x55dc82bf2ced 0x55dc82b85bda 0x55dc82bf3915 0x55dc82bf29ee 0x55dc82bf26f3 0x55dc82cbc4c2 0x55dc82cbc83d 0x55dc82cbc6e6 0x55dc82c94163 0x55dc82c93e0c 0x7efd0eda2bf7 0x55dc82c93cea
tcmalloc: large alloc 4947607552 bytes == 0x55ddabe38000 @  0x7efd0ffb81e7 0x55dc82bb5f98 0x55dc82bcb4ec 0x55dc82c58e93 0x55dc82b84349 0x55dc82b84240 0x55dc82bf80f3 0x55dc82bf29ee 0x55dc82b86271 0x55dc82b85720 0x55dc82b87698 0x55dc82c64441 0x55dc82d027d1 0x55dc82b84349 0x55dc82c75e1d 0x55dc82bf7e99 0x55dc82bf2ced 0x55dc82b85bda 0x55dc82bf7d00 0x55dc82bf2ced 0x55dc82b85bda 0x55dc82bf3915 0x55dc82bf29ee 0x55dc82bf26f3 0x55dc82cbc4c2 0x55dc82cbc83d 0x55dc82cbc6e6 0x55dc82c94163 0x55dc82c93e0c 0x7efd0eda