# Edge-Probing Fine-tuning Example

In this notebook, we will:

* Train a RoBERTa base model on Edge-Probing (Semeval) and evaluate its performance
* Because the Edge-Probing data is not publicly available, we will simulate the run with a single example. This will serve as a guide for users who have access to the task data, or similarly formatted data.
* **The encoder is not frozen for training runs in this notebook.**

The code shown in this notebook will work, but the results will not be representative of the task!

## `jiant` Pipeline

In [1]:
import os

import jiant.proj.main.tokenize_and_cache as tokenize_and_cache
import jiant.proj.main.export_model as export_model
import jiant.proj.main.scripts.configurator as configurator
import jiant.proj.main.runscript as main_runscript
import jiant.utils.python.io as py_io
import jiant.utils.display as display

from rich.progress import track

In [50]:
import torch

task_name = "hellaswag"
task_split="val"
binarynli_classes = ["entailed", "not-entailed"]
commnli_classes = ["entailment", "contradiction", "neutral"]
acc_report = {}
val_data = py_io.read_jsonl(
    f"/content/tasks/curriculum/{task_name}/{task_split}.jsonl")
print(len(val_data))
preds = torch.load(f"./runs/{task_name}/anli-mix-roberta/1000-shot/val_preds.p")[task_name]['preds']

labels = {}
good_predictions = []
wrong_predictions = []

for i, pred in enumerate(preds):
    label = val_data[i]['gold_label']
    if pred == binarynli_classes.index(label):
        good_predictions.append(val_data[i])
    else:
        wrong_predictions.append(val_data[i])

print("# good: ", len(good_predictions))
print("# wrong: ", len(wrong_predictions))

val_data = wrong_predictions
good_selected, _ = get_k_shot_data_multi(good_predictions, k=3000)
val_data += good_selected

8518
# good:  5657
# wrong:  2861


In [74]:
import jiant.utils.python.io as py_io

train_data_raw = py_io.read_jsonl("./curriculum/control/train.jsonl.txt")
val_data_raw = py_io.read_jsonl("./curriculum/control/test.jsonl.txt")
val_data_raw += py_io.read_jsonl("./curriculum/control/dev.jsonl.txt")

print(len(train_data_raw))
print(len(val_data_raw))

6719
1604


In [77]:
label_map = {'e':'entailment', 'c':'contradiction', 'n':"neutral"}

for data in train_data_raw:
  label = data.pop('label')
  data['gold_label'] = label_map[label]

for data in val_data_raw:
  label = data.pop('label')
  data['gold_label'] = label_map[label]

train_data = train_data_raw
val_data = val_data_raw

In [64]:
import random

train_data = []
val_data = []

label_map = {'A':0, 'B':1, 'C':2, 'D':3, 'E':4}

idx = 0
for data in train_data_raw:
  questions = data['questions']
  passage = data['passage']
  for question in questions:
    premise = f"{passage} {question['question']}"
    label = label_map[question['answer']]
    options = question['options']
    hypothesis = options.pop(label)
    random_hypo = random.choice(options)
    example = {
      'idx': idx,
      'premise': premise,
      'hypothesis': hypothesis,
      'gold_label': "entailed"
    }
    idx += 1
    example_n = {
      'idx': idx,
      'premise': premise,
      'hypothesis': random_hypo,
      'gold_label': "not-entailed"
    }
    idx += 1
    train_data.append(example)
    train_data.append(example_n)

idx = 0
for data in val_data_raw:
  questions = data['questions']
  passage = data['passage']
  for question in questions:
    premise = f"{passage} {question['question']}"
    label = label_map[question['answer']]
    options = question['options']
    hypothesis = options.pop(label)
    random_hypo = random.choice(options)
    example = {
      'idx': idx,
      'premise': premise,
      'hypothesis': hypothesis,
      'gold_label': "entailed"
    }
    idx += 1
    example_n = {
      'idx': idx,
      'premise': premise,
      'hypothesis': random_hypo,
      'gold_label': "not-entailed"
    }
    idx += 1
    val_data.append(example)
    val_data.append(example_n)


In [47]:
train_data_raw = [x.strip() for x in open("./curriculum/temporal/tracie_test.txt").readlines()]
val_data_raw = [x.strip() for x in open("./curriculum/temporal/tracie_train.txt").readlines()]

print(len(train_data_raw))

train_data = []
val_data = []
label_map = {"positive": "entailed", "negative": "not-entailed"}

for i, l in enumerate(train_data_raw):
    if "story:" in l.split("\t")[0]:
        story = l.split("\t")[0].split("story:")[1]
    else:
        story = "no story"
    if "event:" in l.split("\t")[0].split("story:")[0]:
        event = l.split("\t")[0].split("story:")[0].split("event:")[1]
    else:
        event = "no event happend"
    label = l.split("\t")[1].split()[1]
    example = {
        "idx": i, "premise": story, "hypothesis": event, "gold_label": label_map[label]
    }
    train_data.append(example)

for i, l in enumerate(val_data_raw):
    if "story:" in l.split("\t")[0]:
        story = l.split("\t")[0].split("story:")[1]
    else:
        story = "no story"
    if "event:" in l.split("\t")[0].split("story:")[0]:
        event = l.split("\t")[0].split("story:")[0].split("event:")[1]
    else:
        event = "no event happend"
    label = l.split("\t")[1].split()[1]
    example = {
        "idx": i, "premise": story, "hypothesis": event, "gold_label": label_map[label]
    }
    val_data.append(example)

4248


In [22]:
import random

train_data = []
val_data = []

idx = 0
for data in train_data_raw:
  premise = data['ctx']
  label = int(data['label'])
  endings = data['endings']
  hypothesis = endings.pop(label)
  random_hypo = random.choice(endings)
  example = {
    'idx': idx,
    'premise': premise,
    'hypothesis': hypothesis,
    'gold_label': "entailed"
  }
  idx += 1
  example_n = {
    'idx': idx,
    'premise': premise,
    'hypothesis': random_hypo,
    'gold_label': "not-entailed"
  }
  idx += 1
  train_data.append(example)
  train_data.append(example_n)

idx = 0
for data in val_data_raw:
  premise = data['ctx']
  label = int(data['label'])
  endings = data['endings']
  hypothesis = endings.pop(label)
  random_hypo = random.choice(endings)
  example = {
    'idx': idx,
    'premise': premise,
    'hypothesis': hypothesis,
    'gold_label': "entailed"
  }
  idx += 1
  example_n = {
    'idx': idx,
    'premise': premise,
    'hypothesis': random_hypo,
    'gold_label': "not-entailed"
  }
  idx += 1
  val_data.append(example)
  val_data.append(example_n)


In [25]:
train_data, train_data_rest = get_k_shot_data_multi(train_data, k=10000)
val_data, val_data_rest = get_k_shot_data_multi(val_data, k=5000)

In [79]:
task_name = "control"

os.makedirs("/content/tasks/configs/", exist_ok=True)
os.makedirs(f"/content/tasks/curriculum/{task_name}", exist_ok=True)
py_io.write_jsonl(
    data=train_data,
    path=f"/content/tasks/curriculum/{task_name}/train.jsonl",
)
py_io.write_jsonl(
    data=val_data,
    path=f"/content/tasks/curriculum/{task_name}/val.jsonl",
)
py_io.write_json({
  "task": f"{task_name}",
  "paths": {
    "train": f"/content/tasks/curriculum/{task_name}/train.jsonl",
    "val": f"/content/tasks/curriculum/{task_name}/val.jsonl",
  },
  "name": f"{task_name}"
}, f"/content/tasks/configs/{task_name}_config.json")

## Creating sample Edge-Probing data.

Because the Edge-Probing data is not publicly available, we will simulate the run with a single example. We will write 1000 copies for the training set and 100 copies for the validation set. We will also write the corresponding task config.

In [24]:

import numpy as np

def get_k_shot_data(datalist, seed=42, k_p=6000, k_n=6000, balanced=False):
    # Set random seed
    np.random.seed(seed)

    # Shuffle the training set
    train_lines = datalist
    np.random.shuffle(train_lines)

    # Get label list for balanced sampling
    label_list = {}
    for line in train_lines:
        label = line['gold_label']
        if label not in label_list:
            label_list[label] = [line]
        else:
            label_list[label].append(line)

    new_train = []
    #for label in label_list:
    k = k_p
    for line in label_list["entailed"][:k]:
        new_train.append(line)
    if not balanced:
        k = k_n
    for line in label_list["not-entailed"][:k]:
        new_train.append(line)
    return new_train

def get_k_shot_data_multi(train_lines, seed=42, k=320):
    np.random.seed(seed)
    np.random.shuffle(train_lines)
    label_list = {}
    for line in train_lines:
        label = line['gold_label']
        if label not in label_list:
            label_list[label] = [line]
        else:
            label_list[label].append(line)
    new_train = []
    new_train_rest = []
    for label in label_list:
        new_train += label_list[label][:k]
        new_train_rest += label_list[label][k:]
    return new_train, new_train_rest

In [28]:
lexical_tasks = [
    "lexical",
    "transitive",
    "hypernymy",
    "hyponymy",
    "ner"
]

syntactic_tasks = [
    "verbnet",
    "verbcorner",
    "syntactic_alternation",
    "syntactic_variation",
]

logical_tasks = [
    "boolean",
    "comparative",
    "conditional",
    "counting",
    "negation",
    "quantifier",
    "monotonicity_infer",
    "syllogism"
]

semantic_tasks = [
    "sentiment",
    "kg_relations",
    "puns",
    "coreference",
    "context_align",
    "sprl"
]

knowledge_tasks = [
    "entailment_tree",
    "proof_writer"
]

commonsense_tasks = [
    "socialqa",
    "physicalqa",
    "atomic",
    "social_chem"
]

comprehension_tasks = [
    "logiqa",
    "cosmoqa",
    "ester",
    "drop"
]

In [35]:
train_data = []
val_data = []

all_tasks = ["lexical_inference", "syntactic_inference", "logical_inference", "semantic_inference", "commonsense_inference", "knowledge_inference"]
fundamental_tasks = [
  "hypernymy", "hyponymy", "syntactic_alternation", "syntactic_variation",
  "boolean", "comparative", "conditional", "counting", "negation", "quantifier","syllogism", 
  "sentiment","kg_relations","puns","coreference","sprl"]

complex_tasks = ["context_align", "entailment_tree",
                 "monotonicity_infer", "socialqa",
                 "physicalqa","atomic","social_chem",
                 "logiqa","cosmoqa","ester","spatial","temporal", "counterfactual"]

all_tasks = fundamental_tasks + complex_tasks

for task in semantic_tasks:
  train_data_raw = py_io.read_jsonl(f"/content/tasks/curriculum/{task}/train.jsonl")
  val_data_raw = py_io.read_jsonl(f"/content/tasks/curriculum/{task}/val.jsonl")

  print(f"Processing data for {task} . . .")
  print(len(val_data_raw))

  if task != "monotonicity_hard":
    for train in train_data_raw:
      label = train["gold_label"]
      if label in ["contradiction", "neutral"]:
        label = "not-entailed"
      elif label == "entailment":
        label = "entailed"
      example = {
        "premise": train["premise"],
        "hypothesis": train["hypothesis"],
        "task": task,
        "gold_label": label,
      }
      train_data.append(example)

  for val in val_data_raw:
    label = val["gold_label"]
    if label in ["contradiction", "neutral"]:
      label = "not-entailed"
    elif label == "entailment":
      label = "entailed"
    example = {
      "premise": val["premise"],
      "hypothesis": val["hypothesis"],
      "task": task,
      "gold_label": label,
    }
    val_data.append(example)

np.random.seed(42)
np.random.shuffle(train_data)
np.random.shuffle(val_data)
len(val_data)

Processing data for sentiment . . .
600
Processing data for kg_relations . . .
761
Processing data for puns . . .
1756
Processing data for coreference . . .
5799
Processing data for context_align . . .
7288
Processing data for sprl . . .
8500


24704

In [45]:
for data in val_data:
  if 'sentence1' in data:
    data['premise'] = data.pop('sentence1')
  if 'sentence2' in data:
    data['hypothesis'] = data.pop('sentence2')

In [47]:
task_name = "lexical"

os.makedirs("/content/tasks/configs/", exist_ok=True)
os.makedirs(f"/content/tasks/curriculum/{task_name}", exist_ok=True)
py_io.write_jsonl(
    data=train_data,
    path=f"/content/tasks/curriculum/{task_name}/train.jsonl",
)
py_io.write_jsonl(
    data=val_data,
    path=f"/content/tasks/curriculum/{task_name}/val.jsonl",
)
py_io.write_json({
  "task": f"{task_name}",
  "paths": {
    "train": f"/content/tasks/curriculum/{task_name}/train.jsonl",
    "val": f"/content/tasks/curriculum/{task_name}/val.jsonl",
  },
  "name": f"{task_name}"
}, f"/content/tasks/configs/{task_name}_config.json")

In [25]:
val_data_med = py_io.read_json("./curriculum/MED.json")

val_data = []
for line in val_data_med:
  label = line['gold_label']
  if label == "entailment":
    label = "entailed"
  else:
    label = "not-entailed"
  example = {
    'premise': line['sentence1'],
    'hypothesis': line['sentence2'],
    'gold_label': label
  }
  val_data.append(example)

In [26]:
val_data_seg = py_io.read_jsonl("./curriculum/Logical/monotonicity_hard/test/test.json")
for line in val_data_seg:
  label = line['gold_label']
  if label == "entailment":
    label = "entailed"
  else:
    label = "not-entailed"
  example = {
    'premise': line['sentence1'],
    'hypothesis': line['sentence2'],
    'gold_label': label
  }
  val_data.append(example)
len(val_data)

6382

In [73]:
import jiant.utils.python.io as py_io

train_data_raw = py_io.read_jsonl("/content/tasks/curriculum/curriculum/train.jsonl")
val_data_raw = py_io.read_jsonl("/content/tasks/curriculum/curriculum/val.jsonl")

print(len(train_data_raw))
print(len(val_data_raw))

35308
25494


In [67]:
train_data_raw[0].keys()

dict_keys(['label', 'idx', 'text_a', 'text_b'])

In [32]:
data_by_label = {}
for data in train_data_raw:
  if data['label'] in data_by_label:
    data_by_label[data['label']].append(data)
  else:
    data_by_label[data['label']] = [data]

len(data_by_label['entailed'])
len(data_by_label['not-entailed'])

1506

In [33]:
entailed = data_by_label['entailed'][1494:]
not_entailed = data_by_label['not-entailed'][1306:]
train_data_raw = data_by_label['entailed'][:1494] + data_by_label['not-entailed'][:1306]
val_data_raw += entailed + not_entailed

In [68]:
train_data = []
val_data = []

classes = ['not-entailed', 'entailed']

for data in train_data_raw:
  premise = data.pop('text_a')
  hypothesis = data.pop('text_b')
  label = data.pop('label')
  data['premise'] = premise
  data['hypothesis'] = hypothesis
  data['gold_label'] = classes[int(label)]
  train_data.append(data)

for data in val_data_raw:
  premise = data.pop('text_a')
  hypothesis = data.pop('text_b')
  label = data.pop('label')
  data['premise'] = premise
  data['hypothesis'] = hypothesis
  data['gold_label'] = classes[int(label)]
  val_data.append(data)

In [97]:
train_data = []
val_data = []

entail = True

for i, line in enumerate(train_data_raw):
  context = line['context']
  question = line['question']
  context += question
  answer = ' '.join(line['answer_texts'])
  hypothesis = f"\"{answer} \" ."

  false_answer = context
  for txt in line['answer_texts']:
    false_answer = false_answer.replace(txt, "")
  flase_hypothesis = f"\"{false_answer} \" ."

  example_ent = {
    "premise": context,
    "hypothesis": hypothesis,
    "gold_label": "entailed"
  }
  exmaple_nent = {
    "premise": context,
    "hypothesis": flase_hypothesis,
    "gold_label": "not-entailed"
  }

  train_data.append(example_ent)
  train_data.append(exmaple_nent)
  entail = not entail

for i, line in enumerate(val_data_raw):
  context = line['context']
  question = line['question']
  context += question
  answer = ' '.join(line['answer_texts'])
  hypothesis = f"\"{answer} \" ."

  false_answer = context
  for txt in line['answer_texts']:
    false_answer = false_answer.replace(txt, "")
  flase_hypothesis = f"\"{false_answer} \" ."

  example_ent = {
    "premise": context,
    "hypothesis": hypothesis,
    "gold_label": "entailed"
  }
  exmaple_nent = {
    "premise": context,
    "hypothesis": flase_hypothesis,
    "gold_label": "not-entailed"
  }

  val_data.append(example_ent)
  val_data.append(exmaple_nent)
  entail = not entail

In [98]:
val_data_plus, train_data_rest = get_k_shot_data_multi(train_data, k=500)
val_data += val_data_plus
train_data = train_data_rest

In [71]:
task_name = "syntactic_variation"

os.makedirs("/content/tasks/configs/", exist_ok=True)
os.makedirs(f"/content/tasks/curriculum/{task_name}", exist_ok=True)
py_io.write_jsonl(
    data=train_data,
    path=f"/content/tasks/curriculum/{task_name}/train.jsonl",
)
py_io.write_jsonl(
    data=val_data,
    path=f"/content/tasks/curriculum/{task_name}/val.jsonl",
)
py_io.write_json({
  "task": f"{task_name}",
  "paths": {
    "train": f"/content/tasks/curriculum/{task_name}/train.jsonl",
    "val": f"/content/tasks/curriculum/{task_name}/val.jsonl",
  },
  "name": f"{task_name}"
}, f"/content/tasks/configs/{task_name}_config.json")

In [None]:
def get_k_shot_task_data(train_lines, k_shot=10):
    task_list = {}
    for line in train_lines:
        task = line['task']
        if task not in task_list:
            task_list[task] = [line]
        else:
            task_list[task].append(line)
    print(len(task_list))
    new_train = []
    for task in task_list:
        new_train_task = get_k_shot_data_multi(task_list[task], k=k_shot)
        new_train += new_train_task
    return new_train

In [None]:
train_data = []
for line in train_data_raw:
  label = line['UpdateType']
  if label == "strengthener":
    gold = "entailed"
  elif label == "weakener":
    gold = "not-entailed"
  example = {
    'premise': f"{line['Premise']} ; {line['Update']}",
    'hypothesis': line['Hypothesis'],
    'gold_label': gold
  }
  train_data.append(example)

val_data = []
for line in val_data_raw:
  label = line['UpdateType']
  if label == "strengthener":
    gold = "entailed"
  elif label == "weakener":
    gold = "not-entailed"
  example = {
    'premise': f"{line['Premise']} ; {line['Update']}",
    'hypothesis': line['Hypothesis'],
    'gold_label': gold
  }
  val_data.append(example)

In [None]:
train_data = []
for line in train_data_raw:
  label = line['UpdateType']
  if label == "strengthener":
    gold = "entailed"
  elif label == "weakener":
    gold = "not-entailed"
  example = {
    'premise': f"{line['SocialChemSituation']} ; {line['Update']}",
    'hypothesis': line['Hypothesis'],
    'gold_label': gold
  }
  train_data.append(example)

val_data = []
for line in val_data_raw:
  label = line['UpdateType']
  if label == "strengthener":
    gold = "entailed"
  elif label == "weakener":
    gold = "not-entailed"
  example = {
    'premise': f"{line['SocialChemSituation']} ; {line['Update']}",
    'hypothesis': line['Hypothesis'],
    'gold_label': gold
  }
  val_data.append(example)

In [None]:
train_data = []
for line in train_data_raw:
  label = line['label']
  if label == 1:
    gold = "entailed"
  else:
    gold = "not-entailed"
  example = {
    'premise': line['text_a'],
    'hypothesis': line['text_b'],
    'gold_label': gold
  }
  train_data.append(example)

val_data = []
for line in val_data_raw:
  label = line['label']
  if label == 1:
    gold = "entailed"
  else:
    gold = "not-entailed"
  example = {
    'premise': line['text_a'],
    'hypothesis': line['text_b'],
    'gold_label': gold
  }
  val_data.append(example)

len(train_data_raw)

In [None]:
train_data = []
for line in train_data_raw:
  try:
    """if type(line['hypothesis']) == "list":
      if len(line['hypothesis']) > 1:
        hypothesis = f"{line['hypothesis'][0]} , {line['hypothesis'][1]}"
      else:
        hypothesis = line['hypothesis'][0]
    else:
      hypothesis = line['hypothesis']"""
    example = {
      'premise': line['premise'].replace("\n", ""),
      'hypothesis': line['hypothesis'].replace("[", "").replace("]", ""),
      'gold_label': line['label']
    }
    train_data.append(example)
  except:
    print(line.keys())

val_data = []
for line in val_data_raw:
  try:
    """if type(line['hypothesis']) != "str":
      if len(line['hypothesis']) > 1:
        hypothesis = f"{line['hypothesis'][0]} , {line['hypothesis'][1]}"
      else:
        hypothesis = line['hypothesis'][0]
    else:
      hypothesis = line['hypothesis']"""
    example = {
      'premise': line['premise'].replace("\n", ""),
      'hypothesis': line['hypothesis'].replace("[", "").replace("]", ""),
      'gold_label': line['label']
    }
    val_data.append(example)
  except:
    print(line.keys())


# DefinitionQA Convert

In [None]:
val_data_raw_easy = py_io.read_json("./curriculum/CoDA-clean-easy.json", encoding="utf8")
val_data_raw_hard = py_io.read_json("./curriculum/CoDA-clean-hard.json", encoding="utf8")

val_data_align = []

for verb_set in val_data_raw_easy['n']:
  candidates = verb_set['candidates']
  for candidate in candidates:
    word = candidate['words_in_contexts'][0]
    premise = candidate['contexts'][0].replace(word, "<mask>")
    definiton = candidate['definition']
    hypothesis = f"the proper context for <mask> here should be {definiton}"
    example = {
      'premise': premise,
      'hypothesis': hypothesis,
      'gold_label': "entailed"
    }
    val_data_align.append(example)

for verb_set in val_data_raw_hard['n']:
  candidates = verb_set['candidates']
  for candidate in candidates:
    word = candidate['words_in_contexts'][0]
    premise = candidate['contexts'][0].replace(word, "<mask>")
    definiton = candidate['definition']
    hypothesis = f"the proper context for <mask> here should be {definiton}"
    example = {
      'premise': premise,
      'hypothesis': hypothesis,
      'gold_label': "entailed"
    }
    val_data_align.append(example)


len(val_data_align)

In [None]:
train_data_defqa = py_io.read_jsonl("/content/tasks/curriculum/context_align_complete/train.jsonl")
val_data_defqa = py_io.read_jsonl("/content/tasks/curriculum/context_align_complete/val.jsonl")

train_data = get_k_shot_data(train_data_defqa)
val_data_new = get_k_shot_data(val_data_defqa, k_p=500, k_n=3239)
val_data = val_data_new + val_data_align

count_labels(train_data)
count_labels(val_data)

In [17]:
train_data_raw[4]

{'question': {'stem': "In the expression 'prismatic colors', the following is the best characterization of the word/phrase 'prismatic'",
  'choices': [{'label': 0, 'text': 'Unevenness; inequality of surface.'},
   {'label': 1, 'text': 'Resembling, or pertaining to, a prism'},
   {'label': 2,
    'text': 'Separated or distributed by a prism; formed by a prism'},
   {'label': 3, 'text': 'Made thick or thicker; thickened; inspissated.'},
   {'label': 4, 'text': 'To imitate; to mimic; to personate.'}]},
 'answerKey': 2,
 'notes': {'gold_synset': 'prismatic.u.2',
  'distractor_synsets': ['wave.u.11',
   'prismatic.a.1',
   'prismatic.u.2',
   'incrassate.u.2',
   'take_off.u.4'],
  'hops': 0,
  'distractor_hops': 0,
  'chain': '',
  'distractor_chain': '',
  'other_distractors': [],
  'word': 'prismatic',
  'sense_choices': 2},
 'id': '4'}

In [20]:
import random

train_data = []

for id, question in enumerate(train_data_raw):
  premise = question['question']['stem'].split("'")[1]
  term = question['question']['stem'].split("'")[3]
  premise = premise.replace(term, '<mask>')
  choices = question['question']['choices']
  answer = choices[int(question['answerKey'])]['text']
  choice_ids = [i for i in range(len(choices))]
  choice_ids.remove(int(question['answerKey']))
  wrong = choices[random.choice(choice_ids)]['text']

  if id % 2 == 0:
    hypothesis = f"the best context for <mask> here is {answer}; but not {wrong}."
    label = "entailed"
  else:
    hypothesis = f"the best context for <mask> here is {wrong}; but not {answer}."
    label = "not-entailed"
  example = {
    "id": id,
    "premise": premise,
    "hypothesis": hypothesis,
    "gold_label": label
  }
  train_data.append(example)

train_data[20]

{'id': 20,
 'premise': 'a <mask> person',
 'hypothesis': 'the best context for <mask> here is Having peculiar views; fanciful; visionary; but not In a dependent manner..',
 'gold_label': 'entailed'}

In [22]:
val_data = train_data + wrong_predictions
len(val_data)

4888

In [24]:
val_data_good = get_k_shot_data_multi(good_predictions, k=1200)

In [25]:
val_data += val_data_good

In [None]:
val_data = []

for id, question in enumerate(val_data_raw):
  premise = question['question']['stem'].split("'")[1]
  term = question['question']['stem'].split("'")[3]
  #premise = premise.replace(term, 'something')
  choices = question['question']['choices']
  answer = choices[int(question['answerKey'])]['text']
  hypothesis = f"a specific type of {term} is {choice['text']} ."
  example = {
    "id": id,
    "premise": premise,
    "hypothesis": hypothesis,
    "gold_label": "entailed"
  }
  val_data.append(example)
  for j, choice in enumerate(choices):
    if j != int(question['answerKey']):
      counter_example = {
        "id": id,
        "premise": premise,
        "hypothesis": f"a specific type of {term} is {choice['text']} .",
        "gold_label": "not-entailed"
      }
      val_data.append(counter_example)

val_data[100]

In [None]:
new_train_data = get_k_shot_data(train_data, k_p=10000, k_n=10000)
new_val_data = get_k_shot_data(val_data, k_p=4250, k_n=4250)

count_labels(new_train_data)
count_labels(new_val_data)

train_data = new_train_data
val_data = new_val_data

# Transitive Convertor

In [None]:
import pandas as pd

train_df = pd.read_table("./curriculum/transitive/train.tsv")
val_df = pd.read_table("./curriculum/transitive/val.tsv")

train_df = train_df.sample(frac=1).reset_index(drop=True)
val_df = val_df.sample(frac=1).reset_index(drop=True)

train_data_trans = []
val_data_trans = []

train_count = {}
val_count = {}

for i, row in train_df.iterrows():
  premise = row['sentence1']
  hypothesis = row['sentence2']
  if row['gold_label'] == "neutral":
    gold_label = "not-entailed"
  else:
    gold_label = "entailed"
  example = {
    "id": i,
    "premise": premise,
    "hypothesis": hypothesis,
    "gold_label": gold_label
  }

  if not row['genre'] in train_count:
    train_count[row['genre']] = [example]
  elif len(train_count[row['genre']]) < 2000:
    train_count[row['genre']].append(example)

for i, row in train_df.iterrows():
  premise = row['sentence1']
  hypothesis = row['sentence2']
  if row['gold_label'] == "neutral":
    gold_label = "not-entailed"
  else:
    gold_label = "entailed"
  example = {
    "id": i,
    "premise": premise,
    "hypothesis": hypothesis,
    "gold_label": gold_label
  }
  if not row['genre'] in val_count:
    val_count[row['genre']] = [example]
  elif len(val_count[row['genre']]) < 2000:
    val_count[row['genre']].append(example)

for key in train_count:
  train_data_trans += train_count[key]

for key in val_count:
  val_data_trans += val_count[key]

In [None]:
count_labels(train_data_trans)
count_labels(val_data_trans)

In [None]:
import jiant.utils.python.io as py_io

train_data_mega = py_io.read_jsonl("/content/tasks/curriculum/megaveridicality/train.jsonl", mode="r")
train_data_fact = py_io.read_jsonl("/content/tasks/curriculum/factuality/train.jsonl", mode="r")

val_data_mega = py_io.read_jsonl("/content/tasks/curriculum/megaveridicality/val.jsonl", mode="r")
val_data_fact = py_io.read_jsonl("/content/tasks/curriculum/factuality/val.jsonl", mode="r")

In [None]:
count_labels(val_data_fact)
count_labels(val_data_mega)

In [None]:
count_labels(train_data_mega)

In [None]:
train_data = get_k_shot_data(train_data_mega, k_p=3150, k_n=3150)
train_data += train_data_trans
train_data +=  get_k_shot_data(train_data_fact, k_p=1850, k_n=1850)
val_data = get_k_shot_data(val_data_mega, k_p=394, k_n=394)
val_data += get_k_shot_data(val_data_trans, k_p=3000, k_n=3000)
val_data += get_k_shot_data(val_data_fact, k_p=1000, k_n=1000)

count_labels(train_data)
count_labels(val_data)

# Syllogism and SEM-Relation Convertor

In [None]:
import os

train_data_raw = [0] * 8

for root, dirs, files in os.walk('./curriculum/syllogism/train'):
    i = -1
    for file in files:
      if "0-train" in file:
        i += 1
      subset = py_io.read_jsonl(f"./curriculum/syllogism/train/{file}", encoding="utf8")
      if train_data_raw[i] == 0:
        train_data_raw[i] = subset
      else:
        train_data_raw[i] += subset

len(train_data_raw[0])

In [None]:
import xmltodict

train_text = {}

with open('./curriculum/sem_relation/1.1.text.xml', 'r', encoding="utf8") as file:
  train_text = xmltodict.parse(''.join(file.readlines()))

train_text.keys()

# LogiQA Convert

In [None]:
import random

answer_key = {
  "a":4, "b":5, "c":6, "d":7
}

train_data = []

with open('./curriculum/logicqa/train.txt', 'r', encoding="utf-8") as file:
  lines = file.readlines()
  idx = 0
  for i in range(0, len(lines), 8):
    entailed = lines[i+answer_key[lines[i+1].replace('\n', '')]]
    seeds = [4,5,6,7]
    seeds.remove(answer_key[lines[i+1].replace('\n', '')])
    neg_key = random.choice(seeds)
    not_entailed = lines[i+neg_key]
    example_p = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "entailed",
    }
    idx += 1
    example_n = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": not_entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "not-entailed",
    }
    idx += 1
    train_data.append(example_p)
    train_data.append(example_n)

In [None]:
val_data = []

with open('./curriculum/logicqa/dev.txt', 'r', encoding="utf-8") as file:
  lines = file.readlines()
  idx = 0
  for i in range(0, len(lines), 8):
    entailed = lines[i+answer_key[lines[i+1].replace('\n', '')]]
    seeds = [4,5,6,7]
    seeds.remove(answer_key[lines[i+1].replace('\n', '')])
    neg_key = random.choice(seeds)
    not_entailed = lines[i+neg_key]
    example_p = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "entailed",
    }
    idx += 1
    example_n = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": not_entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "not-entailed",
    }
    idx += 1
    val_data.append(example_p)
    val_data.append(example_n)

with open('./curriculum/logicqa/test.txt', 'r', encoding="utf-8") as file:
  lines = file.readlines()
  idx = 0
  for i in range(0, len(lines), 8):
    entailed = lines[i+answer_key[lines[i+1].replace('\n', '')]]
    seeds = [4,5,6,7]
    seeds.remove(answer_key[lines[i+1].replace('\n', '')])
    neg_key = random.choice(seeds)
    not_entailed = lines[i+neg_key]
    example_p = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "entailed",
    }
    idx += 1
    example_n = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', ''),
      "hypothesis": not_entailed.replace("A.", "").replace("B.", "").replace("C.", "").replace("D.", "").replace('\n', ''),
      "gold_label": "not-entailed",
    }
    idx += 1
    val_data.append(example_p)
    val_data.append(example_n)

# SPRL Convertor

In [None]:
train_data = []
val_data = []

with open('./curriculum/sprl_data.txt', 'r', encoding="utf-8") as file:
  lines = file.readlines()
  idx = 0
  for i in range(0, len(lines)-7, 7):
    label = lines[i+4].split(':')
    if not "entailed" in lines[i+4]:
      print(i)
    example = {
      "idx": idx,
      "premise": lines[i+2].replace('\n', '').replace("text: ", ""),
      "hypothesis": lines[i+3].replace('\n', '').replace("hypothesis: ", ""),
      "gold_label": label[1].replace("\n", "").strip(),
    }
    idx += 1
    if "train" in lines[i+5]:
      train_data.append(example)
    elif "test" in lines[i+5]:
      val_data.append(example)
    elif "dev" in lines[i+5]:
      val_data.append(example)

In [None]:
import jiant.utils.python.io as py_io

lexical_syntactic_tasks = [
    "transitive",
    "hypernymy",
    "hyponymy",
    "ner",
    "verbnet",
    "verbcorner",
    "syntactic_alternation",
    "mrpc",
]

logical_tasks = [
    "boolean",
    "comparative",
    "conditional",
    "counting",
    "negation",
    "quantifier",
    "monotonicity_infer",
    "syllogism"
]

semantic_tasks = [
    "sentiment",
    "kg_relations",
    "puns",
    "coreference",
    "context_align",
    "sprl",
    "entailment_tree"
]

complex_tasks = [
    "socialqa",
    "physicalqa",
    "atomic",
    "social_chem",
    "logiqa",
    "cosmoqa",
    "ester"
]

ANLI = ['adversarial_nli_r1',
        'adversarial_nli_r2',
        'adversarial_nli_r3']

CURRICULUM = lexical_syntactic_tasks + logical_tasks \
    + semantic_tasks + complex_tasks + ANLI


os.makedirs("./configs/", exist_ok=True)
"""os.makedirs(f"/content/tasks/curriculum/{task_name}", exist_ok=True)
py_io.write_jsonl(
    data=train_data,
    path=f"/content/tasks/curriculum/{task_name}/train.jsonl",
)
py_io.write_jsonl(
    data=val_data,
    path=f"/content/tasks/curriculum/{task_name}/val.jsonl",
)"""

for task_name in CURRICULUM:
  py_io.write_json({
    "task": f"{task_name}",
    "paths": {
      "train": f"../curriculum/{task_name}/train.jsonl",
      "val": f"../curriculum/{task_name}/val.jsonl",
    },
    "name": f"{task_name}"
  }, f"./configs/{task_name}_config.json")

In [None]:
train_data = []
for example in train_data_raw:
  label = "entailment"
  if example['label'] == "neutral":
    label = example['label']
  val = {
    'sentence1': example['premise'],
    'sentence2': example['hypothesis'],
    'gold_label': label
  }
  train_data.append(val)

val_data = []
for example in val_data_raw:
  label = "entailment"
  if example['label'] == "neutral":
    label = example['label']
  val = {
    'sentence1': example['premise'],
    'sentence2': example['hypothesis'],
    'gold_label': label
  }
  val_data.append(val)

In [None]:
CONTEXT_TOKEN = "<S>"
SPAN1_TOKEN = "<SP1>"
SPAN2_TOKEN = "<SP2>"
MASK_TOKEN = "[MASK]"

def generate_prompt(example, prompts):
  template = "<S> Based on the given context , <SP1> ? [MASK] <SP2>"
  for target in example['targets']:
    prompt = template.replace(CONTEXT_TOKEN, example['text'])
    sequence = example['text'].split(" ")
    span1_idx = target['span1']
    span2_idx = target['span2']
    label = target['label']
    span1 = sequence[int(span1_idx[0]):int(span1_idx[1])]
    span2 = sequence[int(span2_idx[0]):int(span2_idx[1])]
    prompt = prompt.replace(SPAN1_TOKEN, " ".join(span1))
    prompt = prompt.replace(SPAN2_TOKEN, " ".join(span2))
    mask_idx = prompt.split(" ").index(MASK_TOKEN)
    new_example = {
      'text': prompt,
      'masked_spans': [[mask_idx, mask_idx+1]],
      'target': label
    }
    prompts.append(new_example)

In [None]:
prompts_train = []
prompts_val = []

for data in train_data:
  generate_prompt(data, prompts_train)
for data_val in train_data:
  generate_prompt(data_val, prompts_val)

In [None]:
print(prompts_train[0])
prompts_train[0]['text'].split(" "[])[49:50]

In [None]:
train_data_raw = py_io.read_jsonl("/content/tasks/curriculum/ner_complete/train.jsonl")
val_data_raw = py_io.read_jsonl("/content/tasks/curriculum/ner_complete/val.jsonl")

In [None]:
train_data = get_k_shot_data(train_data_raw, k_p=10000, k_n=10000)
val_data = get_k_shot_data(val_data_raw, k_p=4250, k_n=4250)

In [26]:
task_name = "entailment_tree"

os.makedirs("/content/tasks/configs/", exist_ok=True)
os.makedirs(f"/content/tasks/curriculum/{task_name}", exist_ok=True)
py_io.write_jsonl(
    data=train_data,
    path=f"/content/tasks/curriculum/{task_name}/train.jsonl",
)
py_io.write_jsonl(
    data=val_data,
    path=f"/content/tasks/curriculum/{task_name}/val.jsonl",
)
py_io.write_json({
  "task": f"{task_name}",
  "paths": {
    "train": f"/content/tasks/curriculum/{task_name}/train.jsonl",
    "val": f"/content/tasks/curriculum/{task_name}/val.jsonl",
  },
  "name": f"{task_name}"
}, f"/content/tasks/configs/{task_name}_config.json")

#### Download model

Next, we will download a `roberta-base` model. This also includes the tokenizer.

In [None]:
import jiant.proj.main.export_model as export_model

export_model.export_model(
    hf_pretrained_model_name_or_path="ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli",
    output_base_path="./models/anli-mix-roberta",
)

#### Tokenize and cache

With the model and data ready, we can now tokenize and cache the inputs features for our task. This converts the input examples to tokenized features ready to be consumed by the model, and saved them to disk in chunks.

In [None]:
import jiant.proj.main.tokenize_and_cache as tokenize_and_cache

bert1 = "bert-base-uncased"
bert2 = "bert-large-uncased"
roberta1 = "roberta-base"
roberta2 = "roberta-large"
deberta = "microsoft/deberta-base"

task_name = "factuality"
model_name = bert1

tokenize_and_cache.main(tokenize_and_cache.RunConfiguration(
    task_config_path=f"/content/tasks/configs/{task_name}_config.json",
    hf_pretrained_model_name_or_path=model_name,
    output_dir=f"./cache/{model_name}/{task_name}",
    phases=["train", "val"],
))

model_name = roberta1

tokenize_and_cache.main(tokenize_and_cache.RunConfiguration(
    task_config_path=f"/content/tasks/configs/{task_name}_config.json",
    hf_pretrained_model_name_or_path=model_name,
    output_dir=f"./cache/{model_name}/{task_name}",
    phases=["train", "val"],
))

model_name = deberta

tokenize_and_cache.main(tokenize_and_cache.RunConfiguration(
    task_config_path=f"/content/tasks/configs/{task_name}_config.json",
    hf_pretrained_model_name_or_path=model_name,
    output_dir=f"./cache/{model_name}/{task_name}",
    phases=["train", "val"],
))

In [None]:
import torch
model = torch.load("./models/microsoft/deberta-base-mnli/model/model.p")
model.keys()

#### Writing a run config

Here we are going to write what we call a `jiant_task_container_config`. This configuration file basically defines a lot of the subtleties of our training pipeline, such as what tasks we will train on, do evaluation on, batch size for each task. The new version of `jiant` leans heavily toward explicitly specifying everything, for the purpose of inspectability and leaving minimal surprises for the user, even as the cost of being more verbose.

We use a helper "Configurator" to write out a `jiant_task_container_config`, since most of our setup is pretty standard. 

**Depending on what GPU your Colab session is assigned to, you may need to lower the train batch size.**

In [None]:
def train_configuration(task_name, model_name):
    jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator(
        task_config_base_path="/content/tasks/configs/",
        task_cache_base_path=f"./cache/{model_name}/",
        train_task_name_list=[task_name],
        val_task_name_list=[task_name],
        train_batch_size=8,
        eval_batch_size=16,
        epochs=5,
        num_gpus=1,
    ).create_config()

    os.makedirs("./run_configs/", exist_ok=True)
    py_io.write_json(jiant_run_config,
                     f"./run_configs/{task_name}_run_config.json")
    display.show_json(jiant_run_config)

def train(task_name, model_name):
    run_args = main_runscript.RunConfiguration(
        jiant_task_container_config_path=f"./run_configs/{task_name}_run_config.json",
        output_dir=f"./runs/{task_name}",
        hf_pretrained_model_name_or_path=model_name,
        model_path=f"./models/{model_name}/model/model.p",
        model_config_path=f"./models/{model_name}/model/config.json",
        learning_rate=1e-5,
        eval_every_steps=500,
        do_train=True,
        do_val=True,
        do_save=True,
        write_val_preds=True,
        freeze_encoder=True,
        force_overwrite=True,
        no_cuda=False
    )
    main_runscript.run_loop(run_args)

In [None]:
from transformers import RobertaTokenizer, RobertaForMaskedLM
import torch

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base')

inputs = tokenizer("There are <mask> people walking, a man, a woman, and two childern.", return_tensors="pt")
labels = tokenizer("There are 4 people walking, a man, a woman, and two childern.", return_tensors="pt")["input_ids"]

outputs = model(**inputs, labels=labels)
loss = outputs.loss
logits = outputs.logits

In [None]:
input = tokenizer.encode("A person who diagnose illnesses works at the hospital. The hospital is where a doctor works. Based on the context, the word illnesses is a <mask>", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
for token in top_3_tokens:
  print(tokenizer.decode([token]))

In [None]:
doctor_def = ["helps a sick person", "treats a patient", "cures a person's disease",
              "diagnoses illnesses", "prescribes medicine"]
lawyer_def = ['represents you in court', 'objects in court', 'questions a witness',
              'rests the case', 'settles a law suit']
teacher_def = ['teaches students', 'assigns homework', 'works in a school',
               'dares you to learn', 'explains a fact']
engineer_def = ['invents and designs structures', 'analyzes, builds, and tests complex systems'
                'forms the link between scientific discoveries to human and business needs',
                'develops new technological solutions',
                'applies engineering analysis in testing, production, or maintenance']
entrepreneur_def = ['creates a new business', 'takes on the risks of creating a startup',
                    'organizes and operates a business', 'needs to ensure funding',
                    'brings good new ideas to market']
firefighter_def = ['is resbonsible for fire and rescure',
                   'rushes to a burning building', 'pulls a hoes',
                   'rescues people from dangerous situations',
                   'carries out a ladder slider', 'extinguish fires']
sailor_def = ['captains a ship', 'sails a ship', 'ties a knot',
              'sounds the depth of the sea', 'boards a boat']
actor_def = ['stars in a movie or TV show', 'acts scene in a play',
             'puts on a costume', 'appears at the theater', 'pictures a scene']
architect_def = ['positions walls and plumbing with exactness', 'completes a floor plan',
                 'plans, designs and oversees the construction of buildings',
                 'pictures a structural design',
                 'illustrates and generates bulding design proposals']
nurse_def = ['cares for a patient', 'pages the doctor',
             'checks vital signs', 'hands the instrument to the doctor', 'takes a pulse']
journalist_def = ['reads a press release', 'reports news', 'covers a story',
                  'interviews strangers with a microphone', 'notes a discrepancy in the facts']
policeman_def = ["is in a police force", 'prevents and detects crimes',
                 'protects and assists the general public', 'arrests criminals', 'investigates a crime']
writer_def = ['write a novel', 'words things for impact', 'writes poems',
              'words a phrase carefully', 'tells a story in a book']
programmer_def = ['step through a computer program', 'writes a software',
                  'sets up a website', 'understands programming language', 'fix a software bug']
carpenter_def = ['builds a house', 'hammers a nail', 'plains a door',
                 'nail furniture together', 'build roof, cabninets, and shelves']
baker_def = ['rolls dough', 'makes a cake', 'bake a wedding cake', 'flower a table',
             'coat a cake with lemon-flavored frosting']
chef_def = ['prepares food', 'works at restaurants and hotels', 'cooks a gourmet meal',
            'cooks very well', 'can bread chicken']
accountant_def = ['is a practitioner of accounting',
                  "has the ability to certify an organization's financial statements",
                  'complete a tax return', 'audits books for different companies', 'cooks the books']
farmer_def = ['farms the land', 'seeds the field', 'waters and gather the crops',
              'normaly owns a barn', 'feeds many farm animals']
scientist_def = ['conducts scientific research to advance knowledge',
                 'has an advanced degrees in an area of science',
                 'exhibits a strong curiosity about reality',
                 'applies scientific knowledge for the benefit of people',
                 'performs experiments to test hypotheses']

In [None]:
computer_def = []
pencil_def = []
watch = []
phone = []
baterry = []
car = []
drills = []
dictionary_def = ['A tool that people use to look up word definition']
soap = []
lamp = []
mirror = []
cooker = []


In [None]:
airport_def = ['is where people get on airplanes',
               'has an air traffic control tower',
               'is used for airplane landing and taking off',
               '',
               '']
hotel_def = ['is a place to stay when people travel',
             'is where people can book a room',
             'is used for a bed away from home',
             'is an establishment providing accommodations for travelers',
             'can provide paid lodging on a short-term basis']
bank_def = ['is a financial institution that accepts deposits from the public',
            'can create a demand deposit',
            'plays an important role in financial stability and the economy of a country',
            'is a subject to minimum capital requirements',
            'is a place where people can apply for credit cards']
hospital_def = ['is used for healing sick people',
                'is a place to have surgery',
                'is for treating seriously injured people',
                'is a workplace for doctors and nurses',
                'is able to handle medical emergencies']
school_def = ['is a place for learning and teaching the students',
              'is where people can get an education',
              'provides learning spaces and learning environments',
              'is where teachers work',
              'typically has classrooms, a library, and a cafeteria']
church_def = ['is where people pray to God']
court_def = []
cinema_def = []
gym_def = []
farm_def = []
gallery_def = []

In [None]:
mountain_def = []

In [None]:
input = tokenizer.encode("A tool that people use to look up word definition is a <mask>", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:
input = tokenizer.encode("Lava and volcanic ash comes from <mask>", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:
input = tokenizer.encode("A professional who applies engineering analysis in testing, production, or maintenance is an <mask>", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:

input = tokenizer.encode("A person who performs experiments to test hypotheses is a <mask>", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:
input = tokenizer.encode("A man, a woman, and two children are walking on the beach. <mask> people are walking on the beach.", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:
input = tokenizer.encode("Bert ate 2 <mask>, cake, ice cream, mochi, cotton candy, spinach, and cabbage.", return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
  print(tokenizer.decode([token]))

In [None]:
import torch

bart = torch.hub.load('pytorch/fairseq', 'bart.base')
bart.cuda()
bart.eval()
bart.fill_mask(['The cat <mask> on the <mask>.'], topk=3, beam=10, match_source_len=False)

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig

model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

ARTICLE_TO_SUMMARIZE = "While at Skidmore , Smith also designed an even taller mixed-use skyscraper , the Burj Dubai , now under construction in the United Arab Emirates ."
inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')

# Generate Summary
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

In [None]:
ARTICLE_TO_SUMMARIZE = "While at Skidmore , Smith also designed an even taller mixed-use skyscraper , the Burj Dubai , now under construction in the United Arab Emirates ."
inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')

# Generate Summary
summary_ids = model.generate(inputs['input_ids'], num_beams=5, max_length=10, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

Since we're training and evaluating on the same (duplicated) example, we should get perfect performance, but hopefully this notebook should be illustrative of the workflow for edge-probing tasks.