In [None]:
from google.colab import drive

drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [None]:
!pip install datasets
!pip install transformers
!pip install nlpaug

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Collec

### Imports

In [None]:
import requests
import json
import torch
import os
from tqdm import tqdm
import sys
import random
from copy import deepcopy
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw

### Download Dataset


In [None]:
from datasets import load_dataset

squad = load_dataset("squad")

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading and preparing dataset squad/plain_text to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Dataset squad downloaded and prepared to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

### Read Data, Adding Noise and Preprocessing

In [None]:
def read_data(squad, split):

  contexts = []
  questions = []
  answers = []

  for data in squad[split]:
    contexts.append(data['context'])
    questions.append(data['question'])
    answers.append(data['answers'])

  return contexts, questions, answers

In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw

# Noise funcs 

# FOR QA ONLY
char_action = ['insert',
        'substitute',
        'delete',
        'swap',
]

def get_action(type):
    return random.choice(char_action)

def add_noise(question, p=0.7):
    """
    Augment a tweet with character-level and word-level noise.

    Args:
        tweet (str): The original tweet.
        p (float): The probability of applying the char level augmentation.

    Returns:
        str: The augmented question.
    """
    # Define a list of character-level augmentation techniques
    char_augmenters = [
        nac.KeyboardAug(aug_char_p=0.2, aug_word_p=0.2, include_special_char=False, include_numeric=False),
        nac.RandomCharAug(action=get_action("char"), aug_char_p=0.1, aug_word_p=0.1),
    ]

    # Define a list of word-level augmentation techniques
    word_augmenters = [
        naw.SpellingAug(),
        naw.SynonymAug(),
    ]

    # Randomly apply a character-level or word-level augmentation with probability p
    if random.random() < p:
        aug = random.choice(char_augmenters)
        noisy_text = aug.augment(question)
    else:
        aug = random.choice(word_augmenters)
        noisy_text = aug.augment(question)
      
    return noisy_text[0]


In [None]:
# train validation splits
train_contexts, train_questions, train_answers = read_data(squad, 'train')
valid_contexts, valid_questions, valid_answers = read_data(squad, 'validation')


# Make False if no noise to be added
noisy = True
noise_percent = 0.15
if noisy:

  for i, question in enumerate(train_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      train_questions[i] = noisy_question

  for i, question in enumerate(valid_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      valid_questions[i] = noisy_question

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
import random
train_size = int(0.2 * len(squad['train']))
train_indices = random.sample(range(len(squad['train'])), train_size)
train_contexts = []
train_questions = []
train_answers = []

for idx in train_indices:
  group = squad['train'][idx]
  train_contexts.append(group['context'])
  train_questions.append(group['question'])
  train_answers.append(group['answers'])

In [None]:
import random
valid_size = int(0.2 * len(squad['validation']))
valid_indices = random.sample(range(len(squad['validation'])), valid_size)
valid_contexts = []
valid_questions = []
valid_answers = []

for idx in valid_indices:
  group = squad['validation'][idx]
  valid_contexts.append(group['context'])
  valid_questions.append(group['question'])
  valid_answers.append(group['answers'])

In [None]:
def add_end_idx(answers, contexts):
  for answer, context in zip(answers, contexts):
    gold_text = answer['text'][0]
    start_idx = answer['answer_start'][0]
    end_idx = []
    end_idx.append(start_idx + len(gold_text))

    # sometimes squad answers are off by a character or two so we fix this
    if context[start_idx:end_idx[0]] == gold_text:
      answer['answer_end'] = end_idx
    elif context[start_idx-1:end_idx-1] == gold_text:
      answer['answer_start'] = start_idx - 1
      answer['answer_end'] = end_idx - 1     # When the gold label is off by one character
    elif context[start_idx-2:end_idx-2] == gold_text:
      answer['answer_start'] = start_idx - 2
      answer['answer_end'] = end_idx - 2     # When the gold label is off by two characters


In [None]:

add_end_idx(train_answers, train_contexts)
add_end_idx(valid_answers, valid_contexts)

### Tokenization


In [None]:
from transformers import BertTokenizerFast

#tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
from transformers import ElectraTokenizerFast, ElectraForQuestionAnswering

tokenizer = ElectraTokenizerFast.from_pretrained('google/electra-small-discriminator')



Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [None]:
train_encodings = tokenizer(train_contexts, train_questions, truncation=True, padding=True)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)

In [None]:
train_encodings.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [None]:
tokenizer.decode(train_encodings['input_ids'][0])

"[CLS] conversely, dst can adversely affect farmers, parents of young children, and others whose hours are set by the sun and they have traditionally opposed the practice, although some farmers are neutral. one reason why farmers oppose dst is that grain is best harvested after dew evaporates, so when field hands arrive and leave earlier in summer their labor is less valuable. dairy farmers are another group who complain of the change. their cows are sensitive to the timing of milking, so delivering milk earlier disrupts their systems. today some farmers'groups are in favor of dst. [SEP] what animal on dairy farms is affected by timing? [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [

In [None]:
def add_token_positions(encodings, answers):
  start_positions = []
  end_positions = []
  for i in range(len(answers)):
    start_positions.append(encodings.char_to_token(i, answers[i]['answer_start'][0]))
    end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'][0] - 1))

    # if start position is None, the answer passage has been truncated
    if start_positions[-1] is None:
      start_positions[-1] = tokenizer.model_max_length
    if end_positions[-1] is None:
      end_positions[-1] = tokenizer.model_max_length

  encodings.update({'start_positions': start_positions, 'end_positions': end_positions})



In [None]:
add_token_positions(train_encodings, train_answers)
add_token_positions(valid_encodings, valid_answers)

In [None]:
train_encodings['start_positions'][:10]

[145, 11, 7, 30, 33, 57, 11, 1, 76, 99]

In [None]:
class SQuAD_Dataset(torch.utils.data.Dataset):
  def __init__(self, encodings):
    self.encodings = encodings
  def __getitem__(self, idx):
    return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  def __len__(self):
    return len(self.encodings.input_ids)

In [None]:
train_dataset = SQuAD_Dataset(train_encodings)
valid_dataset = SQuAD_Dataset(valid_encodings)

In [None]:
from torch.utils.data import DataLoader

# Define the dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16)

### Model definition

In [None]:
from transformers import BertForQuestionAnswering, AutoModel

model = ElectraForQuestionAnswering.from_pretrained('google/electra-base-discriminator')


Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForQuestionAnswering: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['qa_outputs.bias', 'qa_outputs.we

### Training

In [None]:
# Check on the available device - use GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Working on {device}')

Working on cuda


In [None]:
from transformers import AdamW
from sklearn.metrics import f1_score

N_EPOCHS = 5
optim = AdamW(model.parameters(), lr=2e-5)
best_val_loss = float("inf")
model.to(device)
model.train()

for epoch in range(N_EPOCHS):
  loop = tqdm(train_loader, leave=True)
  for batch in loop:
    optim.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_positions = batch['start_positions'].to(device)
    end_positions = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
    loss = outputs[0]
    loss.backward()
    optim.step()

    loop.set_description(f'Epoch {epoch+1}')
    loop.set_postfix(loss=loss.item())

  # Evaluate the model on the validation set
  model.eval()
  val_loss = 0
  predictions = []
  true_labels = []
  with torch.no_grad():
    for batch in valid_loader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      start_positions = batch['start_positions'].to(device)
      end_positions = batch['end_positions'].to(device)
      outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
      loss = outputs[0]
      val_loss += loss.item()
      start_logits = outputs[1]
      end_logits = outputs[2]
      start_preds = start_logits.argmax(dim=1)
      end_preds = end_logits.argmax(dim=1)
      predictions.extend(list(zip(start_preds.cpu().numpy(), end_preds.cpu().numpy())))
      true_labels.extend(list(zip(start_positions.cpu().numpy(), end_positions.cpu().numpy())))
  val_loss /= len(valid_loader)

  if val_loss < best_val_loss:
    best_val_loss = val_loss
    path = '/content/gdrive/MyDrive/NLP Project/models/ELECTRA_QA_NOISY15'
    torch.save(model.state_dict(), path+'/model_parameters.pth')

  print(val_loss,best_val_loss)

  # Set the model back to training mode
  model.train()


Epoch 1: 100%|██████████| 1095/1095 [26:35<00:00,  1.46s/it, loss=0.944]


1.0570257049530072 1.0570257049530072


Epoch 2: 100%|██████████| 1095/1095 [26:39<00:00,  1.46s/it, loss=0.915]


1.0138209057145549 1.0138209057145549


Epoch 3: 100%|██████████| 1095/1095 [26:38<00:00,  1.46s/it, loss=0.585]


1.1501048481077851 1.0138209057145549


Epoch 4: 100%|██████████| 1095/1095 [26:37<00:00,  1.46s/it, loss=0.724]


1.328776044487477 1.0138209057145549


Epoch 5: 100%|██████████| 1095/1095 [26:37<00:00,  1.46s/it, loss=0.198]


1.4298871775018576 1.0138209057145549


### Evaluation

In [None]:
import random
from datasets import load_dataset
from torch.utils.data import DataLoader
from collections import Counter

squad_test = load_dataset("squad")
valid_contexts, valid_questions, valid_answers = read_data(squad_test, 'validation')

valid_size = int(0.2 * len(squad_test['validation']))
valid_indices = random.sample(range(len(squad['validation'])), valid_size)
valid_contexts = []
valid_questions = []
valid_answers = []

for idx in valid_indices:
  group = squad_test['validation'][idx]
  valid_contexts.append(group['context'])
  valid_questions.append(group['question'])
  valid_answers.append(group['answers'])

add_end_idx(valid_answers, valid_contexts)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)
add_token_positions(valid_encodings, valid_answers)
valid_dataset = SQuAD_Dataset(valid_encodings)
valid_loader = DataLoader(valid_dataset, batch_size=16)



  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
path = '/content/gdrive/MyDrive/NLP Project/models/ELECTRA_QA_NOISY15'
model.load_state_dict(torch.load(path+'/model_parameters.pth'))

model = model.to(device)
model.eval()

acc = []
em_score = []
f1_score = []

for batch in tqdm(valid_loader):
  with torch.no_grad():
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_true = batch['start_positions'].to(device)
    end_true = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask)

    start_pred = torch.argmax(outputs['start_logits'], dim=1)
    end_pred = torch.argmax(outputs['end_logits'], dim=1)

    acc.append(((start_pred == start_true).sum()/len(start_pred)).item())
    acc.append(((end_pred == end_true).sum()/len(end_pred)).item())

    # Calculate EM score and F1 score
    for i in range(len(start_true)):
        pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i] + 1], skip_special_tokens=True)
        true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i] + 1], skip_special_tokens=True)
        em_score.append(int(pred_span == true_span))

        # Compute F1 score
        common = Counter(pred_span.split()) & Counter(true_span.split())
        num_common = sum(common.values())
        precision = num_common / max(len(pred_span.split()), 1)
        recall = num_common / max(len(true_span.split()), 1)
        f1 = (2 * precision * recall) / max((precision + recall), 1e-8)
        f1_score.append(f1)


acc = sum(acc) / len(acc)
em_score = sum(em_score) / len(em_score)
f1_score = sum(f1_score) / len(f1_score)

print("\n\nAccuracy:", acc)
print("EM score:", em_score)
print("F1 score:", f1_score)

print("\n\nT/P\tanswer_start\tanswer_end\n")
for i in range(len(start_true)):
  pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i]+1], skip_special_tokens=True)
  true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i]+1], skip_special_tokens=True)
  print(f"true\t{start_true[i]}\t{end_true[i]}\t{true_span}\n"
        f"pred\t{start_pred[i]}\t{end_pred[i]}\t{pred_span}\n")

print("\n\nT/P\tanswer_start\tanswer_end\n")
for i in range(len(start_true)):
  pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i]+1], skip_special_tokens=True)
  true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i]+1], skip_special_tokens=True)
  print(f"true\t{start_true[i]}\t{end_true[i]}\t{true_span}\n"
        f"pred\t{start_pred[i]}\t{end_pred[i]}\t{pred_span}\n")


100%|██████████| 133/133 [01:05<00:00,  2.03it/s]



Accuracy: 0.7168703007518797
EM score: 0.6017029328287606
F1 score: 0.790053641855477


T/P	answer_start	answer_end

true	66	70	the view and the chew
pred	66	76	the view and the chew and the soap opera general hospital

true	25	29	survivable communications networks
pred	7	11	distributed adaptive message block switching



T/P	answer_start	answer_end

true	66	70	the view and the chew
pred	66	76	the view and the chew and the soap opera general hospital

true	25	29	survivable communications networks
pred	7	11	distributed adaptive message block switching






### Evaluation on Noisy Data

In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw

# Noise funcs 

# FOR QA ONLY
char_action = ['insert',
        'substitute',
        'delete',
        'swap',
]

def get_action(type):
    return random.choice(char_action)

def add_noise(question, p=0.7):
    """
    Augment a tweet with character-level and word-level noise.

    Args:
        tweet (str): The original tweet.
        p (float): The probability of applying the char level augmentation.

    Returns:
        str: The augmented question.
    """
    # Define a list of character-level augmentation techniques
    char_augmenters = [
        nac.KeyboardAug(aug_char_p=0.2, aug_word_p=0.2, include_special_char=False, include_numeric=False),
        nac.RandomCharAug(action=get_action("char"), aug_char_p=0.1, aug_word_p=0.1),
    ]

    # Define a list of word-level augmentation techniques
    word_augmenters = [
        naw.SpellingAug(),
        naw.SynonymAug(),
    ]

    # Randomly apply a character-level or word-level augmentation with probability p
    if random.random() < p:
        aug = random.choice(char_augmenters)
        noisy_text = aug.augment(question)
    else:
        aug = random.choice(word_augmenters)
        noisy_text = aug.augment(question)
      
    return noisy_text[0]


In [None]:
import random
from datasets import load_dataset
from torch.utils.data import DataLoader

squad_test = load_dataset("squad")
valid_contexts, valid_questions, valid_answers = read_data(squad_test, 'validation')

import random
random.seed(42)
random_noise = random.uniform(0.05, 0.15)
print(random_noise)

noisy = True
noise_percent = random_noise
if noisy:

  for i, question in enumerate(valid_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      valid_questions[i] = noisy_question
      
  print('added noise')

valid_size = int(0.2 * len(squad_test['validation']))
valid_indices = random.sample(range(len(squad['validation'])), valid_size)
valid_contexts = []
valid_questions = []
valid_answers = []

for idx in valid_indices:
  group = squad_test['validation'][idx]
  valid_contexts.append(group['context'])
  valid_questions.append(group['question'])
  valid_answers.append(group['answers'])

add_end_idx(valid_answers, valid_contexts)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)
add_token_positions(valid_encodings, valid_answers)
valid_dataset = SQuAD_Dataset(valid_encodings)
valid_loader = DataLoader(valid_dataset, batch_size=16)



  0%|          | 0/2 [00:00<?, ?it/s]

0.11394267984578837
added noise


In [None]:
path = '/content/gdrive/MyDrive/NLP Project/models/ELECTRA_QA_NOISY15'
model.load_state_dict(torch.load(path+'/model_parameters.pth'))

model = model.to(device)
model.eval()

acc = []
em_score = []
f1_score = []

for batch in tqdm(valid_loader):
  with torch.no_grad():
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_true = batch['start_positions'].to(device)
    end_true = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask)

    start_pred = torch.argmax(outputs['start_logits'], dim=1)
    end_pred = torch.argmax(outputs['end_logits'], dim=1)

    acc.append(((start_pred == start_true).sum()/len(start_pred)).item())
    acc.append(((end_pred == end_true).sum()/len(end_pred)).item())

    # Calculate EM score and F1 score
    for i in range(len(start_true)):
        pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i] + 1], skip_special_tokens=True)
        true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i] + 1], skip_special_tokens=True)
        em_score.append(int(pred_span == true_span))

        # Compute F1 score
        common = Counter(pred_span.split()) & Counter(true_span.split())
        num_common = sum(common.values())
        precision = num_common / max(len(pred_span.split()), 1)
        recall = num_common / max(len(true_span.split()), 1)
        f1 = (2 * precision * recall) / max((precision + recall), 1e-8)
        f1_score.append(f1)


acc = sum(acc) / len(acc)
em_score = sum(em_score) / len(em_score)
f1_score = sum(f1_score) / len(f1_score)

print("\n\nAccuracy:", acc)
print("EM score:", em_score)
print("F1 score:", f1_score)

print("\n\nT/P\tanswer_start\tanswer_end\n")
for i in range(len(start_true)):
  pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i]+1], skip_special_tokens=True)
  true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i]+1], skip_special_tokens=True)
  print(f"true\t{start_true[i]}\t{end_true[i]}\t{true_span}\n"
        f"pred\t{start_pred[i]}\t{end_pred[i]}\t{pred_span}\n")


100%|██████████| 133/133 [01:10<00:00,  1.89it/s]



Accuracy: 0.7119360902255639
EM score: 0.5789971617786187
F1 score: 0.7738961041193271


T/P	answer_start	answer_end

true	42	45	february 1, 2016
pred	42	45	february 1, 2016

true	144	144	1992
pred	144	144	1992






### **GROUND TRUTH vs MODEL PREDICTIONS**

In [None]:
[['entire length', 'along the entire length'],
 ['james lofton', 'james lofton'],
 ['self - determined', 'students may become more intrinsically motivated'],
 ['whiteness', 'whiteness'],
 ['likelihood of repeating', ''],
 ['by theme',
  'by theme, tomb sculpture, portraiture, garden sculpture and mythology'],
 ['lower rhine', 'lower rhine'],
 ['front islamique de salut', ''],
 ['7, 200', '7, 200'],
 ['complexity resources', 'complexity'],
 ['tulku', 'tulku'],
 ['cholecalciferol', 'cholecalciferol'],
 ['sultan muhammad', 'sultan muhammad'],
 ['high rhine', 'the high rhine'],
 ['three', 'the next three'],
 ['veni redemptor gentium', 'veni redemptor gentium'],
 ['1997', '1997'],
 ['about 500', 'about 500'],
 ['at the end of the 19th century', 'end of the 19th century'],
 ['due to the death of elisabeth sladen', 'death of elisabeth sladen'],
 ['killed in a horse - riding accident', 'dane was killed'],
 ['sky', 'sky'],
 ['brookhaven', 'brookhaven'],
 ['thomson', 'thomson'],
 ['kokochu', 'kokochu'],
 ['cytotoxic natural killer cells and ctls ( cytotoxic t lymphocytes )',
  'cytotoxic natural killer cells'],
 ['april 1, 1963', 'april 1, 1963'],
 ['bills', 'bills'],
 ['of the united states, britain, germany and france',
  'the governments of the united states, britain, germany and france'],
 ['kublai khan', 'kublai khan'],
 ['tugh temur', 'tugh temur'],
 ['suspended sentences', 'suspended'],
 ['beroe', 'beroe'],
 ['happy days', 'happy days'],
 ['actual sea level rise was above the top of the range', ''],
 ['stanford university', 'stanford university'],
 ["his mother's genetics and influence", 'western serbia'],
 ['the lack of reliable statistics from this period',
  'lack of reliable statistics'],
 ['youngstown', 'youngstown'],
 ['kony ealy', 'kony ealy'],
 ['17, 000', '17, 000'],
 ['1906', '1906'],
 ['heterokontophyte', 'heterokontophyte'],
 ['donald davies', 'donald davies'],
 ['phycobilisomes', 'chlorophyll b'],
 ['sequential', 'sequential hermaphrodites'],
 ['the pauli exclusion principle', 'pauli exclusion principle'],
 ['sea gooseberry', 'sea gooseberry'],
 ['captain francis fowke', 'captain francis fowke'],
 ['circuit switching is characterized by a fee per unit of connection time',
  'fee per unit of connection time'],
 ['ring of integers of quadratic number fields', 'quadratic number fields'],
 ['harvard yard', 'harvard yard'],
 ['productivity gap', 'productivity gap'],
 ['christ', 'christ'],
 ['nine', 'at least three'],
 ['twin prime conjecture', "polignac's conjecture"],
 ['the tuesday afternoon prior to the game', 'tuesday afternoon'],
 ['1887', '1887'],
 ['8', '8'],
 ['beyonce and bruno mars', 'beyonce and bruno mars'],
 ["one of his wife's ladies - in - waiting",
  "one of his wife's ladies - in - waiting"],
 ['democracy', 'democracy'],
 ['teachers', 'teachers'],
 ['mongke khan', 'mongke khan'],
 ['two', 'two'],
 ['college', 'college sports'],
 ["nasa's calipso satellite", 'calipso satellite'],
 ['cloud storage', 'cloud storage'],
 ['feo', 'feo'],
 ['thomas murphy', 'thomas murphy'],
 ['boom - and - bust cycles', 'boom - and - bust cycles'],
 ['commercial, scientific, and cultural',
  'commercial, scientific, and cultural'],
 ['padlocking the gates', ''],
 ['dioxygen', 'dioxygen'],
 ['considerable impact', 'experience, ideology, and weapons'],
 ['17 february 1546', '17 february 1546'],
 ['sfx magazine', 'sfx magazine'],
 ['the privy council', 'privy council'],
 ['the 1970s', 'the 1970s'],
 ['sundays', 'sundays'],
 ['wide sidewalks', 'wide sidewalks'],
 ['np - complete', 'np - complete problems'],
 ['helmeted honeyeater', 'helmeted honeyeater'],
 ['11, 600 bp', 'the end of the pleistocene'],
 ['increase',
  'an increase in skilled workers, which led to a decrease in the price of skilled labor'],
 ['because it is a waste of resources', 'it is a waste of resources'],
 ['a net', 'a net'],
 ['two', 'two'],
 ['all other animals',
  'bilateria, sister to the cnidaria, sister to cnidaria, placozoa and bilateria, and sister to all other animal phyla. a series of studies that looked at the presence and absence of members of gene families and signalling pathways ( e. g., homeoboxes, nuclear receptors, the wnt signaling pathway, and sodium channels ) showed evidence congruent with the latter two scenarios, that ctenophores are either sister to cnidaria, placozoa and bilateria or sister to all other animal phyla. several more recent studies comparing complete sequenced genomes of ctenophores with other sequenced animal genomes have also supported ctenophores as the sister lineage to all other animals'],
 ['cengiz han', 'in variety of ways'],
 ['certification by a recognized body', 'certification'],
 ["luther's disappearance", "luther's disappearance"],
 ['channels through which inequality may affect economic growth',
  'the channels through which inequality may affect economic growth'],
 ['radial', 'radial'],
 ['march 1896', 'march 1896'],
 ['january 1979', 'january 1979'],
 ['ctenophores,', 'ctenophores'],
 ['exercise', 'for exercise'],
 ['city road', 'city road'],
 ['ogedei', 'ogedei'],
 ['the eighteenth century', 'eighteenth century'],
 ['$ 400, 000 – $ 450, 000', '$ 400, 000 – $ 450, 000'],
 ['proteolysis', 'proteolysis'],
 ['summa cum laude', 'degrees summa cum laude'],
 ['einstein', 'albert einstein'],
 ['three', 'three'],
 ['articles 106 and 107', 'articles 106 and 107'],
 ['fresno', 'the city of fresno'],
 ['signals from the chloroplast that regulate gene expression in the nucleus',
  'signals from the chloroplast that regulate gene expression in the nucleus'],
 ['mike carey', 'mike carey'],
 ['supply and demand', 'law of supply and demand'],
 ['the 1960s and 1970s', 'first 15'],
 ['warraghiggey, meaning " he who does great things. "', 'warraghiggey'],
 ['neither making maximum effort nor achieving results necessary',
  '" we are neither making maximum effort nor achieving results necessary if this country is to reach a position of leadership. "'],
 ['public high schools lost their accreditation', 'lower taxes'],
 ['four', 'four'],
 ['transpac', 'transpacific yacht race, or transpac'],
 ['budapest telephone exchange', 'budapest telephone exchange'],
 ['buena vista television', 'buena vista television'],
 ['school functions', 'school functions'],
 ['1886', 'late 1886'],
 ['71 %', '71 %'],
 ['the packets may be delivered according to a multiple access scheme',
  'multiple access scheme'],
 ['violence', 'violence'],
 ['liquid oxygen', 'liquid oxygen'],
 ['chagatai', 'chagatai'],
 ['increased', 'was increased'],
 ["accessory pigments that override the chlorophylls'green colors",
  "accessory pigments that override the chlorophylls'green colors"],
 ['public pad service telepad ( using the dnic 2049',
  'public pad service telepad'],
 ['presiding officer', 'presiding officer'],
 ['1870', '1870'],
 ['the convecting mantle', 'convecting mantle'],
 ['rookies', 'rookies'],
 ['1855 colonial constitution', '1855 colonial constitution'],
 ['russell t davies', 'russell t davies'],
 ['mnemiopsis', 'mnemiopsis'],
 ['ships', 'ships'],
 ['1884', 'june 1884'],
 ['october 16, 2012', 'october 16, 2012'],
 ['0. 2 inhabitants per square kilometre', '0. 2 inhabitants'],
 ['adolf galland', 'adolf galland'],
 ['composite numbers ( the carmichael numbers )', 'carmichael numbers'],
 ['tracks, signalling and overhead wires', 'most stations'],
 ['it was never affiliated with any particular denomination', 'never'],
 ['drug choice, dose, route, frequency, and duration of therapy',
  'creating a comprehensive drug therapy plan for patient - specific problems, identifying goals of therapy, and reviewing all prescribed medications prior to dispensing and administration to the patient. the review process often involves an evaluation of the appropriateness of the drug therapy ( e. g., drug choice, dose, route, frequency, and duration of therapy'],
 ['coldplay', 'beyonce and bruno mars'],
 ['nbc affiliate ksee', 'nbc affiliate ksee'],
 ['1890', '1890'],
 ['when they would be married', 'when they would be married'],
 ['explaining their actions', 'defiant'],
 ['henry plitt', 'henry plitt'],
 ['new england patriots', 'new england patriots'],
 ['multiplying two integers', ''],
 ['more wealth', 'wealth and income'],
 ['arizona cardinals', 'arizona cardinals'],
 ['ctenes', 'ctenes'],
 ['oxides', 'oxides'],
 ['june 1978', 'june 1978'],
 ['societies', 'societies'],
 ['charged particle beam weapons', 'charged particle beam'],
 ['st. lawrence and mississippi watersheds, did business with local tribes, and often married indian women',
  'throughout the st. lawrence and mississippi watersheds'],
 ['the great yuan', 'great yuan'],
 ["punish the miami people of pickawillany for not following celoron's orders to cease trading with the british",
  "to punish the miami people of pickawillany for not following celoron's orders to cease trading with the british"],
 ['rudesheim am rhein', 'rudesheim am rhein'],
 ['yin - yang and wuxing', 'yin - yang and wuxing'],
 ['the qara khitai, caucasus, khwarezmid empire, western xia and jin dynasties',
  'western xia and jin dynasties'],
 ['1421', '1421'],
 ['flung to the heedless winds',
  'ein neues lied wir heben an " ( " a new song we raise " ), which is generally known in english by john c. messenger\'s translation by the title and first line " flung to the heedless winds "'],
 ['time or space', 'time or space'],
 ['p ⊆ np ⊆ pp ⊆ pspace', ''],
 ['$ 960 billion', '$ 960 billion'],
 ['tidal currents', 'tidal currents'],
 ['captured enemies', 'body shields'],
 ['stratigraphers', 'stratigraphers'],
 ['george westinghouse', 'george westinghouse'],
 ['187 feet', '187 feet'],
 ['50 kilopascals', 'more than 50 kilopascals ( kpa )'],
 ['academy', 'outdoors'],
 ['can produce both eggs and sperm, meaning it can fertilize its own egg',
  'a single animal can produce both eggs and sperm, meaning it can fertilize its own egg, not needing a mate'],
 ['britain', 'britain'],
 ['europe, north america, asia and north africa',
  'europe, north america, asia and north africa'],
 ['yinchuan', 'yinchuan'],
 ['nine', 'nine'],
 ['royal shakespeare', 'royal shakespeare company'],
 ['1562', '1562'],
 ['italian renaissance', 'italian renaissance'],
 ['high humidity', 'extremely high humidity'],
 ['han chinese', 'han chinese'],
 ['mork & mindy', 'mork & mindy'],
 ['major general james abercrombie', 'major general james abercrombie'],
 ['four', 'four'],
 ['universities', 'newcastle and northumbria universities'],
 ['an increase in the input size', ''],
 ['50th anniversary special',
  'the programme is listed in guinness world records as the longest - running science fiction television show in the world, the " most successful " science fiction series of all time — based on its over - all broadcast ratings, dvd and book sales, and itunes traffic — and for the largest ever simulcast of a tv drama with its 50th anniversary special'],
 ['a recurring decimal', 'recurring'],
 ['miniature cydippids', 'miniature cydippids'],
 ['cretaceous – paleogene extinction event',
  'cretaceous – paleogene extinction event'],
 ['otrar', 'otrar'],
 ['continental edison company', ''],
 ['women', 'queen bees'],
 ['jones et al. 1998, pollack, huang & shen 1998, crowley & lowery 2000 and briffa 2000',
  'crowley & lowery 2000 and briffa 2000'],
 ['dreadnought battleships', ''],
 ['ideological', 'ideological'],
 ['health care professional', 'health care'],
 ['the financial crisis of 2007 – 08', 'the financial crisis of 2007 – 08'],
 ['the university of chicago college bowl team',
  'university of chicago college bowl team'],
 ['3600 revolutions per minute', '3600 revolutions per minute'],
 ['one', 'one'],
 ['1945', '1945'],
 ['the three doctors', 'the three doctors'],
 ['$ 60, 000 in cash and stock and a royalty of $ 2. 50 per ac horsepower produced by each motor',
  '$ 60, 000'],
 ['city hall', 'city hall'],
 ['higher oxygen content', 'their higher oxygen content'],
 ['was not covered in any newspapers', 'not covered in any newspapers'],
 ['over 60 percent', 'over 60 percent'],
 ['shen kuo', 'shen kuo'],
 ['make detailed plans and maintain careful oversight',
  'make detailed plans and maintain careful oversight'],
 ['artisans and farmers', 'artisans and farmers'],
 ['the dinophyte nucleus', 'dinophyte nucleus'],
 ['chastity', 'chastity'],
 ['general and complete disarmament', 'general and complete disarmament'],
 ['the neutral zone', ''],
 ['anthropological', 'anthropological'],
 ['battle of dalan balzhut', 'dalan balzhut'],
 ['john b. goodenough', 'john b. goodenough'],
 ['to avoid trivialization', 'to avoid trivialization'],
 ['orbital scientific instrument package',
  'an orbital scientific instrument package'],
 ['achtliederbuch', 'achtliederbuch'],
 ["privately in the principal's office", "in the principal's office"],
 ['become utterly debased', 'utterly debased'],
 ['singing of german hymns', 'german hymns'],
 ['epoxides', 'epoxides'],
 ['june 30, 1951', 'june 30, 1951'],
 ['only justified against governmental entities',
  'that civil disobedience is only justified against governmental entities'],
 ['pepsi', 'pepsi'],
 ['hamas', 'hamas'],
 ["st. george's united methodist church",
  "st. george's united methodist church"],
 ['2001', '2001'],
 ['a small portion of the population lives off unearned property income',
  'a small portion of the population lives off unearned property income'],
 ['toregene khatun', 'toregene khatun'],
 ['new guinea', 'german new guinea'],
 ['same - gender marriages with resolutions', 'same - gender marriages'],
 ['oriental courts', 'owen jones'],
 ['propaganda', 'propaganda'],
 ['dallas', 'dallas'],
 ['they lost money from the beginning, and sinback, a high - level marketing manager, was given the job of turning the business around',
  'could be profitable'],
 ['japan', 'japan'],
 ['the referendum in france and the referendum in the netherlands',
  'referendum in france and the referendum in the netherlands'],
 ['the city council', 'city council'],
 ['1st century bc', '1st century bc'],
 ['league of augsburg', 'league of augsburg'],
 ['1969', '1969'],
 ['fighting horsemen', 'fighting horsemen'],
 ['normal',
  'pauli repulsion ( due to fermionic nature of electrons ) follows resulting in the force that acts in a direction normal to the surface interface between two objects. : 93 the normal force'],
 ['los angeles',
  'los angeles, orange, san diego, san bernardino, and riverside'],
 ['interventionism', 'interventionism'],
 ['17', '17'],
 ['sports night', 'sports night'],
 ["nasa's langley research center", 'houston, texas'],
 ['to conduct photosynthesis', 'to conduct photosynthesis'],
 ['a committee of independent experts', 'a committee of independent experts'],
 ['" right ", " just ", or " true "', '" right ", " just ", or " true "'],
 ['forced tesla out', 'forced tesla out'],
 ["as a means to help the state's educational and economic development",
  "to explore computer networking between three of michigan's public universities as a means to help the state's educational and economic development"],
 ['association of american universities',
  'association of american universities'],
 ['unfair',
  'high levels of inequality, outcomes that are widely viewed as unfair'],
 ['well logs', 'well logs'],
 ['( / ˈfrɛznoʊ / frez - noh )', 'frez - noh'],
 ['ivf', 'ivf'],
 ['2003', '2003'],
 ['france', 'germany'],
 ['christmas eve', 'christmas eve'],
 ['9', '9'],
 ['attempted to enter the test site',
  'the committee for non - violent action sponsored a protest in august 1957, at the camp mercury nuclear test site near las vegas, nevada, 13 of the protesters attempted to enter the test site knowing that they faced arrest'],
 ['theatres', 'theatres'],
 ['hamas', 'hamas'],
 ['over $ 40 million', 'over $ 40 million'],
 ['abolish the state of israel', 'abolish the state of israel'],
 ['theology and philosophy', 'theology and philosophy'],
 ['taoism', 'taoism'],
 ['shaping ideas about the free market',
  'shaping ideas about the free market'],
 ['graham gano', 'graham gano'],
 ['nfl owners', 'nfl owners'],
 ['eight original series serials', 'eight original series serials'],
 ['intracellular pathogenesis', 'intracellular pathogenesis'],
 ['1724 to 1725', '1724 to 1725'],
 ['robert lane and benjamin vail', 'robert lane and benjamin vail'],
 ['egyptian islamic jihad organization',
  'egyptian islamic jihad organization'],
 ['the ease with which people, youth in particular, can obtain controlled substances',
  'the ease with which people, youth in particular, can obtain controlled substances'],
 ['zheng', 'zheng'],
 ['indulgences for the living', 'indulgences for the living'],
 ['the tax rate', 'tax rate'],
 ['warsaw', 'warsaw'],
 ['the revolution', 'the revolution'],
 ['love radio', 'love radio'],
 ['igg', 'igg'],
 ['australian broadcasting corporation ( abc )',
  'australian broadcasting corporation'],
 ['aare', 'river aare'],
 ['tesla would be killed through overwork',
  'tesla would be killed through overwork'],
 ['genghis khan', 'genghis khan'],
 ['the most rigorous, intense', 'rigorous, intense'],
 ['" informal " imperialism', 'uneven trade agreements'],
 ["he explored the mountains in hunter's garb", 'read many books'],
 ['its root word pharma', 'pharma'],
 ['spain', 'spain'],
 ['gerhard. lessing', 'franz pieper'],
 ['scottish parliament building', 'holyrood area of edinburgh'],
 ['13 years and 48 days', '39'],
 ['the owner', 'the owner'],
 ['number of gates in a circuit', 'number of gates'],
 ['2003', '2003'],
 ['1905', '1905'],
 ['one way', 'one way streets'],
 ['66', '33 lb ( 15 kg )'],
 ['1977', '1977'],
 ['the " social chapter "', 'a protocol'],
 ['sophocles', 'sophocles'],
 ['johann gerhard', 'franz pieper'],
 ['alga', 'glaucophyte'],
 ['tracy wolfson and evan washburn', 'tracy wolfson and evan washburn'],
 ['expulsion', 'expulsion'],
 ['effect',
  "it may be possible that another kuznets'cycle is occurring, specifically the move from the manufacturing sector to the service sector"],
 ['edgar', ''],
 ['placebo', 'placebo effect'],
 ['twelve', 'twelve'],
 ['negotiations', 'negotiations for a settlement'],
 ['east african community', 'east african community'],
 ['all angles remain the same', 'orogenic wedge'],
 ['2014', '2014'],
 ['6 miles', '6 miles'],
 ['sri lanka',
  "korea, the himalayan kingdoms and south east asia. korean displays include green - glazed ceramics, silk embroideries from officials'robes and gleaming boxes inlaid with mother - of - pearl made between 500 ad and 2000. himalayan items include important early nepalese bronze sculptures, repousse work and embroidery. tibetan art from the 14th to the 19th century is represented by notable 14th - and 15th - century religious images in wood and bronze, scroll paintings and ritual objects. art from thailand, burma, cambodia, indonesia and sri lanka"],
 ['inputs', 'best, worst and average case complexity'],
 ['494, 665', '494, 665'],
 ['constant flooding',
  'to counteract the constant flooding and strong sedimentation in the western rhine delta'],
 ['99. 4', '99. 4'],
 ['newcastle diamonds', 'newcastle diamonds'],
 ['rheinbrech', 'near the surface'],
 ['home viewers who made tape recordings of the show', 'home viewers'],
 ['cortisol and catecholamines', 'cortisol and catecholamines'],
 ['water flow through the body cavity', 'water flow through the body cavity'],
 ['carrots, turnips, new varieties of lemons, eggplants, and melons, high - quality granulated sugar, and cotton',
  'carrots, turnips, new varieties of lemons, eggplants, and melons, high - quality granulated sugar, and cotton'],
 ['editor of electrical world magazine', 'a friend and publicist'],
 ['toghrul', 'toghrul'],
 ['triumphing by a brave defence', '" triumphing by a brave defence'],
 ['ghazan khan', 'ghazan khan'],
 ['germany and the united kingdom', 'germany and the united kingdom'],
 ['warner bros. presents', 'warner bros. presents'],
 ['the mother', 'the mother'],
 ['j. i. pontanus', 'gasquet'],
 ['united kingdom', 'united kingdom'],
 ['photooxidative damage', 'photooxidative damage'],
 ['eadweard muybridge', 'eadweard muybridge'],
 ['harvey martin', 'harvey martin'],
 ['elected msps', 'elected msps'],
 ['courts of member states and the court of justice of the european union',
  'the courts of member states and the court of justice of the european union'],
 ['canadian radio - television and telecommunications commission',
  'canadian radio - television and telecommunications commission'],
 ['nypd blue', 'nypd blue'],
 ['piston', 'partial vacuum'],
 ['director', 'director'],
 ['additional membranes outside of the original two', 'additional membranes'],
 ['accountants', 'accountants'],
 ['riches of croesus', 'my poverty for the riches of croesus'],
 ['allegations of professional misconduct', 'complaints involving members'],
 ['a german nazi colonial administration',
  'german nazi colonial administration'],
 ["the city's residents fled to the north", 'the north'],
 ['nuda', 'nuda'],
 ['valleys', 'transverse and peninsular ranges'],
 ['the christmas invasion', '" the christmas invasion'],
 ['southern china withstood and fought to the last',
  'because southern china withstood and fought to the last before caving in'],
 ['28', '28 floor riverplace tower'],
 ['the parish church of st andrew', 'the parish church of st andrew'],
 ['british colonists would not be safe as long as the french were present',
  'very badly disposed towards the french, and are entirely devoted to the english. i don\'t know in what way they could be brought back. " even before his return to montreal, reports on the situation in the ohio country were making their way to london and paris, each side proposing that action be taken. william shirley, the expansionist governor of the province of massachusetts bay, was particularly forceful, stating that british colonists would not be safe'],
 ['august 1992', 'august 1992'],
 ['prince frederick iii', 'prince frederick iii, elector of saxony'],
 ['some paintings', 'paintings'],
 ['juris hartmanis and richard stearns',
  'juris hartmanis and richard stearns'],
 ['6, 000 square kilometres', '6, 000 square kilometres'],
 ['700', '700'],
 ['daewoo', 'daewoo'],
 ['government officials and climate change experts',
  'governmental organizations'],
 ['duisburg', 'duisburg'],
 ['host interface to x. 25 and the terminal interface to x. 29', 'x. 25'],
 ['religion from politics', 'religion from politics'],
 ["president of abc's broadcasting division",
  "president of abc's broadcasting division"],
 ['five', 'five'],
 ['1. 4 times normal', '1. 4 times'],
 ['independent', 'independent schools'],
 ['market', 'market forces'],
 ['mayor ed lee', 'ed lee'],
 ['general conference', 'general conference'],
 ['telenet was incorporated in 1973 and started operations in 1975. it went public in 1979 and was then sold to gte',
  "telenet was the first fcc - licensed public data network in the united states. it was founded by former arpa ipto director larry roberts as a means of making arpanet technology public. he had tried to interest at & t in buying the technology, but the monopoly's reaction was that this was incompatible with their future. bolt, beranack and newman ( bbn ) provided the financing. it initially used arpanet technology but changed the host interface to x. 25 and the terminal interface to x. 29. telenet designed these protocols and helped standardize them in the ccitt. telenet was incorporated in 1973 and started operations in 1975. it went public in 1979 and was then sold to gte"],
 ['von miller', 'von miller'],
 ['parachutes', 'parachutes'],
 ['by the end of 1350', '1350'],
 ['150 nobel laureates', '150'],
 ['queen victoria', 'queen victoria'],
 ['alberto calderon', 'alberto calderon'],
 ['foundational constitutional questions affecting democracy and human rights',
  'democracy and human rights'],
 ['17', '1979 – 80'],
 ['the roads', 'the roads'],
 ['28 days', '28'],
 ['108', '108'],
 ['jingshi dadian', 'jingshi dadian'],
 ['hermaphroditism and early reproduction',
  'hermaphroditism and early reproduction'],
 ['" the comprehensive institutions of the great yuan "',
  'the comprehensive institutions of the great yuan'],
 ['one', 'one'],
 ['evolution of the german language',
  'evolution of the german language and literature'],
 ['mathematical models', 'mathematical models'],
 ['endowments', 'religious organizations or private individuals'],
 ['a tatar chieftain, temujin - uge, whom his father had just captured',
  'temujin - uge'],
 ['inertia', 'inertia'],
 ['peyton manning', 'peyton manning'],
 ["quality of a country's institutions",
  "the quality of a country's institutions and high levels of education"],
 ['elway', 'elway'],
 ['1985', '1985'],
 ['mars', 'mars'],
 ['south', 'the south'],
 ['february 2015', 'february 2015'],
 ['attention - seeking and disruptive students',
  'attention - seeking and disruptive students'],
 ['misguided', 'misguided'],
 ['yassa', 'the yassa'],
 ['inertia', 'inertia'],
 ['fled',
  'immediately set about defeating small fractions of the khwarzemi forces'],
 ['strange odor in their spacesuits', 'a strange odor in their spacesuits'],
 ['denver broncos', 'denver broncos'],
 ['apollo 13', 'apollo 13'],
 ['the mitochondrial double membrane', 'mitochondrial double membrane'],
 ['zhu yuanzhang', 'zhu yuanzhang'],
 ['lunar module pilot', 'lunar module pilot'],
 ['larry ellison', 'larry ellison'],
 ['mercuric oxide', 'mercuric oxide'],
 ['ten times their own weight', 'ten times their own weight'],
 ['1331', '1331'],
 ['specialized mushroom - shaped cells in the outer layer of the epidermis',
  'specialized mushroom - shaped cells in the outer layer of the epidermis'],
 ['carolina panthers', 'carolina panthers'],
 ['velamen parallelum', 'velamen parallelum'],
 ['1973 – 1974', '1973 – 1974'],
 ['as much as 50 %', '50 %'],
 ["the conservative european people's party", "european people's party"],
 ['nothing', 'nothing'],
 ['" variations of snow and ice in the past and at present on a global and regional scale "',
  'wwf report'],
 ['voters approved the plan', 'voters approved the plan'],
 ['tana river, as well as the turkwel gorge dam', 'upper tana river'],
 ['jean cauvin ( john calvin )', 'jean cauvin'],
 ['low latitude', 'low latitude and coastal location'],
 ['modern hatred of the jews', 'modern hatred of the jews'],
 ['glaucophyte', 'alga'],
 ['antichrist', 'the antichrist'],
 ['" southwest fresno "', 'southwest fresno'],
 ['residential and non - residential', 'residential and non - residential'],
 ['daniel andrews', 'daniel andrews'],
 ['boats', 'keels'],
 ['both kenia and kegnia', 'kenia and kegnia'],
 ['2015', '2015'],
 ['stress tensor', 'forces'],
 ['rutherford grammar school', 'rutherford grammar school'],
 ['jeronimo de ayanz y beaumont', 'jeronimo de ayanz y beaumont'],
 ['the rainforest was reduced to small, isolated refugia separated by open forest and grassland',
  'that the rainforest was reduced to small, isolated refugia separated by open forest and grassland ; other scientists argue that the rainforest remained largely intact but extended less far to the north, south, and east'],
 ['arizona cardinals', 'arizona cardinals'],
 ['micrometeoroid impact craters', 'highly brecciated'],
 ['the hostmen', 'the hostmen'],
 ['possibly another 25', '25'],
 ['constant factors and smaller terms', 'constant factors and smaller terms'],
 ['actions - oriented', 'the five pillars of islam'],
 ['a restaurant',
  'a half - timbered house by the river, was the site of a weaving school from the late 16th century to about 1830. ( it has been adapted as a restaurant'],
 ['john harvard', 'john harvard'],
 ['clergyman', 'british clergyman'],
 ['15 % – 16 %', '15 % – 16 %'],
 ['november 3, 1975', 'november 3, 1975'],
 ['quadruple expansion engines', 'triple and quadruple expansion engines'],
 ["meeting of the church's general assembly",
  "meeting of the church's general assembly"],
 ['11', '11'],
 ['tolerant', 'tolerant'],
 ['821, 784', '821, 784'],
 ['the most popular show at the time', 'the most popular'],
 ['early twentieth century homes', 'early twentieth century'],
 ['wide world of sports', 'wide world of sports'],
 ["st. george's united methodist church",
  "st. george's united methodist church"],
 ['the father of the house', 'father of the house'],
 ['1523 adaptation of the latin mass', ''],
 ['94', '94 pounds'],
 ['1935', '1930'],
 ['working versions of 3d - printing building technology are already printing',
  '2014'],
 ['temujin and his brother khasar', 'khasar'],
 ['cbs', 'cbs'],
 ['complexity class p', 'p'],
 ['friendly and supportive', 'friendly and supportive'],
 ['kalka river', 'kalka river'],
 ['saint nicolas', 'nicolas'],
 ['the formation of starch - storing amyloplasts', 'amyloplasts'],
 ['inherited from the jin dynasty', 'the jin dynasty'],
 ['when it is single stranded', 'single stranded'],
 ['arizona cardinals', 'arizona cardinals'],
 ['mechanical energy', 'mechanical energy'],
 ['greek', 'greek'],
 ['cevennes', 'cevennes mountain region'],
 ['" a machine to end war "', 'a machine to end war'],
 ['three times', 'three'],
 ['the aauw study', 'the aauw study'],
 ['amsterdam and the area of west frisia',
  'amsterdam and the area of west frisia'],
 ['cytotoxic or immunosuppressive drugs',
  'cytotoxic or immunosuppressive drugs'],
 ['up to a thousand times', ''],
 ['blurring of theological and confessional differences in the interests of unity',
  'the " blurring of theological and confessional differences in the interests of unity'],
 ['because he or she can then sell more medications to the patient',
  'he or she can then sell more medications to the patient'],
 ['‘ combs ’ – groups of cilia', 'combs ’ – groups of cilia'],
 ['teaching', 'teaching'],
 ['combustion', 'combustion'],
 ['force', 'force'],
 ['dongshan dafo dian', 'dongshan dafo dian'],
 ['the plague was present somewhere in europe in every year between 1346 and 1671.',
  'was present somewhere in europe in every year between 1346 and 1671'],
 ['1873', '1873'],
 ['thames river', 'thames river'],
 ['philo of byzantium', 'philo of byzantium'],
 ['post - world war i', 'post - world war i'],
 ['coptic', 'coptic cathedral'],
 ['welsh', 'welsh'],
 ['the sovereign', 'the sovereign'],
 ['high fuel prices and new competition from low - cost air services',
  'high fuel prices and new competition from low - cost air services'],
 ['ethernet attached hosts, and eventually tcp / ip and additional public universities in michigan join the network',
  "all of this set the stage for merit's role in the nsfnet project starting in the mid - 1980s"],
 ['0. 5', '7. 8 %'],
 ['prima scriptura', 'prima scriptura'],
 ['proteins',
  'carbohydrates contain the largest proportion by mass of oxygen. all fats, fatty acids, amino acids, and proteins'],
 ['asymptotic distribution', 'asymptotic'],
 ['economic', 'economic'],
 ['abc', 'abc'],
 ['anarchists', 'anarchists'],
 ['lymphocytes', 'lymphocytes'],
 ['danube', 'the rhone and danube'],
 ['1943', '1856'],
 ['cam newton', 'cam newton'],
 ['a construction manager, design engineer, construction engineer or project manager',
  'a construction manager, design engineer, construction engineer or project manager'],
 ['5 million', '5 million'],
 ['physically imposing',
  'printed images of luther that emphasized his monumental size'],
 ['see school corporal punishment.', 'school corporal punishment'],
 ['antagonistic', 'antagonistic'],
 ['manning', 'manning and newton'],
 ['monitoring of atmospheric oxygen levels show a global downward trend',
  'downward'],
 ['thesis 86', 'thesis 86'],
 ['aided', "aided'schools"],
 ['increasing importance of human capital in development',
  'the increasing importance of human capital'],
 ['oswego', 'oswego'],
 ['the portuguese', 'the portuguese'],
 ['250, 000 feet', '250, 000'],
 ['$ 40, 000', '$ 40, 000'],
 ['south kensington', 'south kensington'],
 ['new england patriots', 'new england patriots'],
 ['caris & co.', 'caris & co.'],
 ['july 6, 2009', 'july 6, 2009'],
 ['the greatest good', 'the greatest good'],
 ['use the arrest as an opportunity',
  'lack of understanding of the legal ramifications, or due to a fear of seeming rude'],
 ["much of the city's tax base dissipated",
  "much of the city's tax base dissipated"],
 ['the fundamental theorem of arithmetic',
  'fundamental theorem of arithmetic'],
 ['philipp melanchthon', 'philipp melanchthon'],
 ['eleven', 'eleven'],
 ['the thylakoid network', 'thylakoid network'],
 ['within the last 5 – 10 million years', '5 – 10 million years'],
 ['co - operation', 'co - operation'],
 ['1505', '1505'],
 ['brad nortman', 'brad nortman'],
 ['visitation of the electorate',
  'their visitation of the electorate of saxony'],
 ['$ 759, 900', '$ 759, 900'],
 ['the bound on the complexity of reductions',
  'based on the method of reduction, such as cook reductions, karp reductions and levin reductions, and the bound on the complexity of reductions'],
 ['1759 - 60', '1759 - 60'],
 ['extra pay', 'extra pay'],
 ['after 1279', '1279'],
 ['juveniles will luminesce more brightly', 'juveniles'],
 ['one in five', 'one in five'],
 ['recover the latent heat of vaporisation',
  'recover the latent heat of vaporisation'],
 ['adaptive and innate immune responses', 'adaptive and innate'],
 ['loss of biodiversity', 'biodiversity'],
 ['1 a. m', '1 a. m'],
 ['moselle', 'the neckar, the main and, later, the moselle'],
 ['cambrian period.', 'mid - cambrian'],
 ['the malaria parasite', 'the malaria parasite'],
 ['june 4, 2014', 'june 4, 2014'],
 ['the architect or engineer', 'architect or engineer'],
 ['kmj - tv', 'kmj - tv'],
 ['article 49', '49'],
 ['chlorophyll b', 'chlorophyll b'],
 ['800 ce', ''],
 ['in obtaining cost - effective medication and avoiding the unnecessary use of medication that may have side - effects',
  'in obtaining cost - effective medication'],
 ['ancient egypt', 'ancient egypt'],
 ['huguon', 'huguon'],
 ['the absolute value', 'absolute value'],
 ['time and relative dimension in space', 'mark i type 40'],
 ['man and culture in a counterfeit paradise',
  'amazonia : man and culture in a counterfeit paradise'],
 ['2016', '2016'],
 ['50 - yard line.', '50 - yard line'],
 ['may 21, 2013', 'may 21, 2013'],
 ['15 may 1525', '15 may 1525'],
 ['trial division', 'trial division'],
 ['access to education', 'education'],
 ['eleutherian gunpowder mills', 'eleutherian gunpowder mills'],
 ['the plan that the delegates agreed to was never ratified by the colonial legislatures nor approved of by the crown',
  'never ratified by the colonial legislatures nor approved of by the crown'],
 ['august 2010', '4 august 2010'],
 ['400 m wide', '400 m'],
 ['by department', 'by department'],
 ['1964', '1964'],
 ["constructed the king's road", "constructed the king's road"],
 ['the main contractor', 'the main contractor'],
 ['humans', 'humans'],
 ['2011', '2011'],
 ['mechanical brushes', 'a commutator'],
 ['gateshead council', 'gateshead council'],
 ['plague of athens in 430 bc', 'the plague of athens'],
 ['masaaki shirakawa', 'masaaki shirakawa'],
 ['1735', '1735'],
 ['member state courts', 'member state courts'],
 ['virginia', 'virginia'],
 ['biennial', 'biennial'],
 ['2 july 1505', '2 july 1505'],
 ['democratic', 'democratic process'],
 ['edict of fontainebleau', 'edict of fontainebleau'],
 ['john smeaton', 'john smeaton'],
 ['the mughal state', 'the political weakness of the mughal state'],
 ['pathogens', 'pathogens'],
 ['colonialism', 'colonialism'],
 ['mathematical by - product', 'mathematical by - product'],
 ['k', 'k'],
 ['eukaryotic', 'eukaryotic'],
 ['kelvin benjamin', 'kelvin benjamin'],
 ['can interpret the treaties, but it cannot rule on their validity',
  'can interpret the treaties'],
 ['rhine gutter', 'rhine gutter'],
 ['an official school sport', 'an official school sport'],
 ['a cryptophyte', 'a cryptophyte'],
 ['168, 637', '168, 637'],
 ['the talons of weng - chiang', 'the talons of weng - chiang'],
 ['four', 'four'],
 ['greater tendency to take on debts', 'a greater tendency to take on debts'],
 ['third largest', 'third'],
 ['melts', 'the lead melts'],
 ['supporting function', 'a supporting function'],
 ['stroke', 'a stroke'],
 ['in the shallow crust', 'in the shallow crust'],
 ['avoid prohibitively costly dowry demands',
  "to avoid prohibitively costly dowry demands, legal assistance, sports facilities, and women's groups"],
 ['20 minutes', '20 minutes'],
 ["incorporate their prey's nematocysts ( stinging cells ) into their own tentacles instead of colloblasts",
  "incorporate their prey's nematocysts ( stinging cells ) into their own tentacles"],
 ['yellow fever outbreaks', 'yellow fever outbreaks'],
 ['yersinia pestis', 'yersinia pestis'],
 ['quantitative statements', 'quantitative'],
 ['jewish', 'jewish'],
 ['kelvin benjamin', 'kelvin benjamin'],
 ['recognized student organizations', 'recognized student organizations'],
 ['in the kingdom', 'the kingdom'],
 ['the dating game', 'dating game'],
 ['san diego', 'los angeles area'],
 ['ted heath', 'ted heath'],
 ['thousands', 'thousands'],
 ['passed', 'several university of chicago professors'],
 ['vicious and destructive', 'vicious and destructive'],
 ['europeans who were based in britain', 'british and europeans'],
 ['amazonia : man and culture in a counterfeit paradise', 'amazonia'],
 ['a rock concert', 'a rock concert'],
 ['1080i hd', '1080i hd'],
 ['fears of being labelled a pedophile or hebephile',
  'fears of being labelled a pedophile or hebephile'],
 ['jerome schurf', 'jerome schurf'],
 ['39.', '39'],
 ['" internal colonialism "', ''],
 ['philip segal', 'philip segal'],
 ['2011 and 2012', '2011 and 2012'],
 ['another problem', 'another problem'],
 ['commission v austria', 'commission v austria'],
 ['a pair of tentilla - bearing tentacles',
  'oval bodies that are flattened in the oral - aboral direction, with a pair of tentilla - bearing tentacles'],
 ['13 years and 48 days', '26'],
 ['all the normal forms of parental discipline', 'parental discipline'],
 ['skylab', 'skylab'],
 ["bainbridge's", "bainbridge's"],
 ['1, 160, 000', '1, 160, 000'],
 ['alpha phi omega', 'four'],
 ['louis adamic', 'louis adamic'],
 ['less than $ 1. 25 a day', '$ 1. 25'],
 ['korean', 'korean'],
 ['middle period of classical antiquity',
  'middle period of classical antiquity'],
 ['german', 'german'],
 ['japanese imports', 'japanese'],
 ["new orleans'mercedes - benz superdome, miami's sun life stadium, and the san francisco bay area's levi's stadium",
  "new orleans'mercedes - benz superdome, miami's sun life stadium, and the san francisco bay area's levi's stadium"],
 ['africa', 'africa'],
 ['1288', 'battle of bach đang'],
 ['finsteraarhorn', 'finsteraarhorn'],
 ['niagara falls', 'niagara falls'],
 ['modern cryptographic systems', 'rsa algorithm'],
 ['member states', 'member states'],
 ['san fernando valley', 'san fernando valley'],
 ['deformational events', 'deformational events'],
 ['best drama series', 'best drama series'],
 ['270, 000', '270, 000'],
 ['4. 6 billion years',
  'extremely old compared to rocks found on earth, as measured by radiometric dating techniques. they range in age from about 3. 2 billion years for the basaltic samples derived from the lunar maria, to about 4. 6 billion years'],
 ['strong rivalry against cornell', 'cornell'],
 ['generate atp energy',
  'use the potential energy stored in an h +, or hydrogen ion gradient to generate atp energy'],
 ["10 o'clock tea ( chai ya saa nne ) and 4 pm tea", "10 o'clock"],
 ['five to ten years', 'five to ten years'],
 ['brazil', 'brazil'],
 ['anheuser - busch inbev', 'anheuser - busch inbev'],
 ['go home and change', 'go home and change her dress'],
 ['fresno', 'fresno'],
 ['hero of alexandria', 'hero of alexandria'],
 ["the leaflets will have to be given to the leafleter's own jury as evidence",
  "prosecutors have reasoned ( correctly ) that if they arrest fully informed jury leafleters, the leaflets will have to be given to the leafleter's own jury as evidence"],
 ['eurocities', 'eurocities'],
 ['six', 'six'],
 ['eight', 'eight'],
 ['2100', '2100'],
 ['archbishop albrecht', 'archbishop albrecht of mainz and magdeburg'],
 ['edison', 'edison'],
 ['higher aggregate utility', 'higher aggregate utility'],
 ['transcendentalist unitarian', 'transcendentalist unitarian'],
 ['dave logan', 'dave logan'],
 ['sentences by peter lombard', 'biblical studies'],
 ['slave craton in northwestern canada', 'northwestern canada'],
 ['carbon monoxide', 'carbon monoxide'],
 ['reversed', 'reversed'],
 ['quarterback', 'quarterback'],
 ['ten', 'ten'],
 ['disastrous financial situation',
  'because of their disastrous financial situation'],
 ['12 %', '12 %'],
 ['alvaro martin and raul allegre', 'alvaro martin and raul allegre'],
 ['middleton railway', 'the edge railed rack and pinion middleton railway'],
 ['basic channels', 'basic'],
 ['zhongdu', 'zhongdu'],
 ['infidels', 'infidels'],
 ['armed', 'armed'],
 ['seven days to the river rhine', 'seven days to the river rhine'],
 ['in the castle church', 'castle church in wittenberg'],
 ['japan', 'japan'],
 ['over $ 40 million', 'over $ 40 million'],
 ['( exodus 1 : 15 - 19 )', 'book of exodus'],
 ['convergent boundaries',
  'arcs of volcanoes and earthquakes were explained as convergent boundaries'],
 ['the romantic rhine', 'the romantic rhine'],
 ['67. 9', '67. 9'],
 ['22, 000 – 14, 000 yr bp', 'ca. 22, 000 – 14, 000 yr bp'],
 ['british superintendent for indian affairs in the new york region and beyond',
  'superintendent for indian affairs'],
 ['isaac newton', 'humphry davy'],
 ['ladner', 'ladner'],
 ['by technique', 'technique'],
 ['elementary school education certificate', 'elementary school education'],
 ['krakow', 'krakow'],
 ['turning the whole climate science assessment process into a moderated " living " wikipedia - ipcc',
  'tightening the selection of lead authors and contributors, to dumping it in favor of a small permanent body'],
 ['t. j. ward.', 't. j. ward'],
 ['five', 'five'],
 ['oxygen', 'oxygen'],
 ['sap center', 'sap center'],
 ['immunodeficiencies', 'immunodeficiencies'],
 ['second', 'second - most'],
 ['a two - membraned chloroplast', 'a two - membraned chloroplast'],
 ['super bowl xxxiii', 'super bowl xxxiii'],
 ['13. 34 %', '13. 34 %'],
 ['song', 'song dynasty and the ming dynasty'],
 ['39', '39'],
 ['diploblastic', 'diploblastic'],
 ['45 minutes', '45 minutes'],
 ['performance', 'performance'],
 ['2011', '2011'],
 ['american civil rights movement', 'singing revolution'],
 ['isotope ratios of radioactive elements', 'isotope ratios'],
 ['sweden v. russia and allies', 'sweden v. russia and allies'],
 ['constant pressure', 'constant pressure'],
 ['a background check and psychiatric evaluation',
  'background check and psychiatric evaluation'],
 ['feed water', 'feed water'],
 ['within the borders of warsaw', 'within the borders of warsaw'],
 ['dutch cape colony', 'dutch cape colony'],
 ['two hymns', 'two'],
 ['disease', 'domestic social reforms'],
 ['1755', '1755'],
 ['organisms', 'organisms'],
 ['thoreau', ''],
 ['times square',
  "abc has generally aired dick clark's new year's rockin'eve on new year's eve"],
 ['antigens', 'antigens'],
 ['chloroplasts and other plastids', 'chloroplasts and other plastids'],
 ['pre - game and halftime coverage.', 'pre - game and halftime coverage'],
 ['gateshead', 'gateshead'],
 ['rest', 'rest'],
 ['manning', 'manning'],
 ['5, 984', '5, 984'],
 ['oliver', 'oliver'],
 ["' bucks point '", 'bucks point'],
 ['v8 and six cylinder engines', 'v8 and six cylinder'],
 ['various locations throughout the world',
  'various locations throughout the world'],
 ['12 million', 'over 12 million'],
 ['radiography', 'radiography'],
 ['the amazon rainforest', 'amazon rainforest'],
 ['dwight d. eisenhower', 'john f. kennedy'],
 ['10, 000', '10, 000'],
 ['sediment deposits', 'sediment deposits from amazon basin paleolakes'],
 ['seven', 'seven'],
 ['new orleans', 'new orleans'],
 ['first world war.', 'first world war'],
 ['12 december 1963', '12 december 1963'],
 ['charter schools', 'charter schools'],
 ['season 11', 'season 11'],
 ['westward', 'westward'],
 ['isel', 'isel'],
 ['martin sekulic', 'martin sekulic'],
 ['complicated definitions', 'complicated'],
 ['archangel michael', 'monte gargano'],
 ['germania', 'germania'],
 ['reflective',
  'reflective of individual contributions to the social product.'],
 ['home improvement', 'home improvement'],
 ['late 1980s', 'late 1980s'],
 ['2006', '2006'],
 ['multi - stage centrifugal', 'centrifugal'],
 ['1988', '1988'],
 ['futureplan', 'futureplan'],
 ['electric current', 'electric current'],
 ['september 1944', 'september 1944'],
 ['the saxon garden', 'the saxon garden'],
 ['treaty on the functioning of the european union',
  'treaty on the functioning of the european union'],
 ['a green algal derived chloroplast', 'green algal derived chloroplast'],
 ['more than 48 hours', '48 hours'],
 ['nederrijn', 'nederrijn'],
 ['british', 'british'],
 ['atmospheric engine', 'atmospheric engine'],
 ['1830', '1830'],
 ["miami's sun life stadium",
  "new orleans'mercedes - benz superdome, miami's sun life stadium"],
 ['pleurobrachia', 'cydippid pleurobrachia'],
 ['defensins', 'defensins'],
 ['steal the invention', 'steal'],
 ["cannot initiate legislation against the commission's wishes",
  "initiate legislation against the commission's wishes"],
 ['mortgage bankers, accountants, and cost engineers',
  'mortgage bankers, accountants, and cost engineers'],
 ['1850s', '1850s'],
 ['opportunity - based entrepreneurship', 'opportunity - based'],
 ['2 metres ( 6 ft 7 in )', '2 metres'],
 ['kingdoms',
  'kingdoms of francia on the lower rhine, burgundy on the upper rhine and alemannia on the high rhine'],
 ['sun life stadium', 'sun life stadium'],
 ['david graeber and donald johanson', 'david graeber and donald johanson'],
 ['the guanabara confession of faith', 'guanabara confession of faith'],
 ['$ 100, 000', '$ 100, 000'],
 ['regulations and directives', 'regulations and directives'],
 ["gandhi's", 'gandhi'],
 ['60', 'over 60'],
 ['matthew 16 : 18', 'matthew 16 : 18'],
 ['they arranged for israel to pull back from the sinai peninsula and the golan heights.',
  'multilateral'],
 ['after the sixth sermon', 'after the sixth sermon'],
 ['about seven - eighths', 'seven - eighth'],
 ['organic molecules', 'common organic molecules'],
 ['1560', '1560'],
 ['the european court of justice', 'european court of justice'],
 ['after dropping to the sea - floor',
  'by a more radical metamorphosis, after dropping to the sea - floor'],
 ['trust in christ', "god's grace"],
 ['13th', '13th - century'],
 ['1944', 'in the aftermath of the warsaw uprising of 1944'],
 ['santa clara university', 'santa clara university'],
 ['yes', 'yes'],
 ['hearst television', 'hearst television'],
 ['robert underwood johnson', 'robert underwood johnson'],
 ['by having colloblasts', 'colloblasts'],
 ['basel', 'basel'],
 ['between 1859 and 1865', 'between 1859 and 1865'],
 ['legal equality of all individuals, including women',
  'a civil state under the great yassa'],
 ['location of warsaw', 'the location of warsaw within the border region'],
 ['the national anthem', 'the national anthem'],
 ['huguenots furnished two new regiments',
  'furnished two new regiments of his army'],
 ['arm', 'arm'],
 ['the eleventh', ''],
 ['500, 000', 'roughly 500, 000'],
 ['587, 000', '587, 000'],
 ['louis agassiz', 'louis agassiz'],
 ['children of earth', 'children of earth'],
 ['1731', '1731'],
 ['bryan davies', 'bryan davies'],
 ['parliament of the united kingdom', 'parliament of the united kingdom'],
 ['the end itself', 'the end itself'],
 ['christopher hay and douglas coyne', 'aston webb'],
 ['hasar, hachiun, and temuge', 'hasar, hachiun, and temuge'],
 ['1884', '1883 – 84'],
 ['oxygen', 'oxygen'],
 ['approximately 1015 kelvins', '1015 kelvins'],
 ['lothar de maiziere', 'lothar de maiziere'],
 ['direct effect or indirect effect', 'direct effect or indirect effect'],
 ['upper lake', 'upper lake'],
 ['the uprights', 'the uprights'],
 ['radio network', 'radio network'],
 ['edward teller', 'edward teller'],
 ['american institute of electrical engineers',
  'american institute of electrical engineers'],
 ['1957', '1957'],
 ['below 0 °c', '0 °c'],
 ['manhattan', '89 liberty street'],
 ['ticonderoga point,', 'ticonderoga point'],
 ['significant new evidence or events that change our understanding of climate science',
  'any significant new evidence or events that change our understanding of climate science'],
 ['1875', '1875'],
 ['the solvability of quadratic equations',
  'the solvability of quadratic equations'],
 ['bones', 'bones as calcium phosphate and hydroxylapatite'],
 ['how much time the best algorithm requires to solve the problem',
  'how much time'],
 ['ought to live', 'how the christian ought to live'],
 ['the 2005 version', 'the 2005'],
 ['large areas', 'large areas'],
 ['credible claims of corruption were made with regard to recruitment and procurement of armoured personnel carriers',
  'recruitment and procurement of armoured personnel carriers'],
 ['regeneration', 'regeneration'],
 ['14', '14'],
 ['inform the jury and the public of the political circumstances',
  'use the proceedings as a forum to inform the jury and the public of the political circumstances surrounding the case and their reasons for breaking the law via civil disobedience. " a technical defense may enhance the chances for acquittal but make for more boring proceedings and reduced press coverage. during the vietnam war era, the chicago eight used a political defense, while benjamin spock used a technical defense. in countries such as the united states whose laws guarantee the right to a jury trial but do not excuse lawbreaking for political purposes, some civil disobedients seek jury nullification'],
 ['third', 'third'],
 ['dephlogisticated air', 'dephlogisticated air'],
 ['william ii', 'duke william ii of normandy'],
 ['2011', '2011'],
 ['afternoon of may 2.', 'may 2'],
 ['the king', 'the king'],
 ['apollo 5', 'apollo 5'],
 ['store', 'central pacific railroad'],
 ['arbitrary graph', 'an arbitrary graph'],
 ['mongol and turkic tribes', 'mongol and turkic'],
 ['khorasan', 'khorasan'],
 ['before kublai in 1285', '1285'],
 ['increasingly expected to be compensated for their patient care skills',
  'integral'],
 ['lake george', 'lake george'],
 ['the space museum', 'the space museum'],
 ["al - gama'a al - islamiyya", "al - gama'a al - islamiyya"],
 ['mercantilism', 'mercantilism'],
 ['carbon dioxide', 'carbon dioxide'],
 ['former king of thebes', 'oedipus'],
 ['1580', '1580'],
 ['prestige', 'german prestige'],
 ['materials melted near an impact crater.',
  'materials melted near an impact crater'],
 ['missing self', 'missing self'],
 ['british', 'british'],
 ['cyclades packet switching network', 'cyclades packet switching network'],
 ['x - ray imaging', 'x - ray imaging'],
 ['carolina panthers', 'the carolina panthers'],
 ['women', 'women'],
 ['five', 'five'],
 ['propulsion, electrical power and life support', 'life support'],
 ['the " hugues hypothesis "', 'hugues hypothesis'],
 ['hundreds', 'hundreds'],
 ['independent components', 'independent components'],
 ['a modern context', 'a modern context'],
 ['threatened " old briton " with severe consequences if he continued to trade with the british',
  'threatened " old briton " with severe consequences if he continued to trade with the british. " old briton " ignored the warning'],
 ['pedagogy', 'pedagogy'],
 ['lake uberlingen', 'lake uberlingen'],
 ['1493 – 1500', '1493 – 1500'],
 ['large - scale regeneration', 'large - scale regeneration'],
 ['abc television center', 'abc television center'],
 ['a diatom ( heterokontophyte ) derived chloroplast', 'diatom'],
 ['1, 230 kilometres ( 764 miles )', '1, 230 kilometres'],
 ['between 1. 4 and 5. 8 °c above 1990 levels', 'between 1. 4 and 5. 8 °c'],
 ['the jews', 'the jews'],
 ['178', '178'],
 ['france', 'france'],
 ['eu law', 'eu law'],
 ['william h. maxwell', 'william h. maxwell'],
 ['ten', 'ten'],
 ['d & b contractors', 'd & b contractors'],
 ['5 – 8 μm in diameter', '5 – 8 μm'],
 ['1 or 0', 'yes or no'],
 ['power', 'power'],
 ['xbox one', 'tablets, windows 10, xbox one'],
 ['vice president agnew', ''],
 ['i feel i did the right thing by violating this particular law',
  "it is a civil disobedient's duty to submit to the punishment prescribed by law"],
 ['afranji', 'afranji'],
 ['tiffany & co.', 'tiffany & co'],
 ['computational resource', 'computational'],
 ['arizona cardinals', 'arizona cardinals'],
 ['may 21, 2013', 'may 21, 2013'],
 ['weakness in school discipline', 'weakness in school discipline'],
 ['complex silicates',
  "silicon ( silica sio 2, as found in granite and quartz ), aluminium ( aluminium oxide al 2o 3, in bauxite and corundum ), iron ( iron ( iii ) oxide fe 2o 3, in hematite and rust ), and calcium carbonate ( in limestone ). the rest of the earth's crust is also made of oxygen compounds, in particular various complex silicates"],
 ['homeschooling', 'homeschooling'],
 ['construction', 'construction'],
 ['1965', '1965'],
 ["france's claim to the region was superior to that of the british",
  'since rene - robert cavelier, sieur de la salle had explored the ohio country nearly a century earlier'],
 ['number one', ''],
 ['soviet union', 'soviet union'],
 ['1275', '1275'],
 ['chum salmon', 'chum salmon, oncorhynchus keta'],
 ['the machine oscillated at the resonance frequency of his own building',
  'the police arrived'],
 ['delivery of these messages by store and forward switching',
  'store and forward switching'],
 ['higher economic inequality', 'higher economic inequality'],
 ['socialism in one country', "socialism in one country'for the soviet union"],
 ['germany and switzerland', 'germany and switzerland'],
 ['synthetic aperture radar ( sar )', 'synthetic aperture radar'],
 ['high wages', 'high wages'],
 ['bing crosby', 'bing crosby'],
 ['the unfair commercial practices directive',
  'unfair commercial practices directive'],
 ['fabricating evidence or committing perjury',
  'assisting in fabricating evidence or committing perjury'],
 ['one msp', 'one msp'],
 ['george b. storer', 'george b. storer'],
 ['rationing', 'rationing'],
 ['in areas that are being actively deformed',
  'in areas that are being actively deformed'],
 ['consumer prices', 'consumer prices'],
 ['1961', '1961'],
 ["the steps of nairobi's harambee house", 'harambee house'],
 ['ablative heat shield', 'ablative heat shield'],
 ['malnutrition', 'malnutrition'],
 ['private confession and absolution',
  'whether the pope has the power to require it rejected compulsory confession and encouraged private confession and absolution'],
 ['us', 'us'],
 ['quasiturbine', 'quasiturbine'],
 ['mark i type 40 tardis', 'tardis'],
 ['university of wittenberg', 'the university of wittenberg'],
 ['less than a year', 'less than a year'],
 ["manhattan's lower east side", "manhattan's lower east side"],
 ['civil, military, and censorial offices',
  'central government administration'],
 ['500', '500'],
 ['abilene', 'abilene'],
 ['international organizations and foreign governments',
  'governmental entities'],
 ['427, 652', '427, 652'],
 ['the 50 fund', 'the 50 fund'],
 ['1755', '1755'],
 ['the netherlands', 'the netherlands'],
 ['active', 'more active and lived longer'],
 ['general electric', 'general electric'],
 ['collingwood street', 'collingwood street'],
 ['the principle of inclusions and components',
  'the principle of inclusions and components'],
 ['2016', '2016'],
 ['1959', '1959'],
 ['1903', '1903'],
 ...]