# **Question Answering with BERT**
on SQuAD 1.0.  


In [None]:
import requests
import json
import torch
import os
from tqdm import tqdm
import sys
import random
from copy import deepcopy
import random
import numpy as np
from collections import Counter
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install datasets
!pip install transformers
!pip install nlpaug

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nlpaug
Successfully installed nlpaug-1.1.11


In [None]:
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)


set_seed(42)

### **Download SQuAD 1.0 ⬇️**

SQuAD consists of two json files.

* train dataset 
* validation dataset

In [None]:
from datasets import load_dataset

squad = load_dataset("squad")

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading and preparing dataset squad/plain_text to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Dataset squad downloaded and prepared to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
len(squad['train']), len(squad['validation'])

(87599, 10570)

### **Get data 📁** 

After we got a taste of the jsons files data format let's extract our data and store them into some data structures.

In [None]:
def read_data(squad, split):

  size = int(0.2 * len(squad[split]))
  indices = random.sample(range(len(squad[split])), size)

  contexts = []
  questions = []
  answers = []

  for idx in indices:
    data = squad[split][idx]
    contexts.append(data['context'])
    questions.append(data['question'])
    answers.append(data['answers'])

  return contexts, questions, answers

### Adding Noise

In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw

# Noise funcs 

# FOR QA ONLY
char_action = ['insert',
        'substitute',
        'delete',
        'swap',
]

def get_action(type):
    return random.choice(char_action)

def add_noise(question, p=0.7):
    """
    Augment a tweet with character-level and word-level noise.

    Args:
        tweet (str): The original tweet.
        p (float): The probability of applying the char level augmentation.

    Returns:
        str: The augmented question.
    """
    # Define a list of character-level augmentation techniques
    char_augmenters = [
        nac.KeyboardAug(aug_char_p=0.2, aug_word_p=0.2, include_special_char=False, include_numeric=False),
        nac.RandomCharAug(action=get_action("char"), aug_char_p=0.1, aug_word_p=0.1),
    ]

    # Define a list of word-level augmentation techniques
    word_augmenters = [
        naw.SpellingAug(),
        naw.SynonymAug(),
    ]

    # Randomly apply a character-level or word-level augmentation with probability p
    if random.random() < p:
        aug = random.choice(char_augmenters)
        noisy_text = aug.augment(question)
    else:
        aug = random.choice(word_augmenters)
        noisy_text = aug.augment(question)
      
    return noisy_text[0]


In [None]:
# train validation splits
train_contexts, train_questions, train_answers = read_data(squad, 'train')
valid_contexts, valid_questions, valid_answers = read_data(squad, 'validation')


# Make False if no noise to be added
noisy = True
noise_percent = 0.1
if noisy:

  for i, question in enumerate(train_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      train_questions[i] = noisy_question

  for i, question in enumerate(valid_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      valid_questions[i] = noisy_question

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
len(train_contexts)

17519

In [None]:
# print a random question and answer
print(f'There are {len(train_questions)} questions')
print(train_questions[0])
print(train_answers[0])

There are 17519 questions
What poet wrote a long poem describing Roman religious holidays?
{'text': ['Ovid'], 'answer_start': [346]}


In [None]:
print(train_answers[0])

{'text': ['Ovid'], 'answer_start': [346]}


In [None]:
def add_end_idx(answers, contexts):
  for answer, context in zip(answers, contexts):
    gold_text = answer['text'][0]
    start_idx = answer['answer_start'][0]
    end_idx = start_idx + len(gold_text)

    # sometimes squad answers are off by a character or two so we fix this
    if context[start_idx:end_idx] == gold_text:
      answer['answer_end'] = [end_idx]
    elif context[start_idx-1:end_idx-1] == gold_text:
      answer['answer_start'] = start_idx - 1
      answer['answer_end'] = [end_idx - 1]     # When the gold label is off by one character
    elif context[start_idx-2:end_idx-2] == gold_text:
      answer['answer_start'] = start_idx - 2
      answer['answer_end'] = [end_idx - 2]     # When the gold label is off by two characters

In [None]:
add_end_idx(train_answers, train_contexts)
add_end_idx(valid_answers, valid_contexts)

In [None]:
# You can see that now we get the answer_end also
print(train_questions[0])
print(train_answers[0])

What poet wrote a long poem describing Roman religious holidays?
{'text': ['Ovid'], 'answer_start': [346], 'answer_end': [350]}


### **Tokenization 🔢**

As we know we have to tokenize our data in form that is acceptable for the BERT model. We are going to use the `BertTokenizerFast` instead of `BertTokenizer` as the first one is much faster. Since we are going to train our model in batches we need to set `padding=True`.

In [None]:
from transformers import BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
train_encodings = tokenizer(train_contexts, train_questions, truncation=True, padding=True)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)

In [None]:
def add_token_positions(encodings, answers):
  start_positions = []
  end_positions = []
  for i in range(len(answers)):
    start_positions.append(encodings.char_to_token(i, answers[i]['answer_start'][0]))
    end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'][0] - 1))

    # if start position is None, the answer passage has been truncated
    if start_positions[-1] is None:
      start_positions[-1] = tokenizer.model_max_length
    if end_positions[-1] is None:
      end_positions[-1] = tokenizer.model_max_length

  encodings.update({'start_positions': start_positions, 'end_positions': end_positions})



In [None]:
add_token_positions(train_encodings, train_answers)
add_token_positions(valid_encodings, valid_answers)

In [None]:
train_encodings['start_positions'][:10]

[71, 12, 44, 144, 139, 127, 49, 87, 21, 39]

### **Dataset definition 🗄️**

We have to define our dataset using the PyTorch Dataset class from `torch.utils` in order create our dataloaders after that.

In [None]:
class SQuAD_Dataset(torch.utils.data.Dataset):
  def __init__(self, encodings):
    self.encodings = encodings
  def __getitem__(self, idx):
    return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  def __len__(self):
    return len(self.encodings.input_ids)

In [None]:
train_dataset = SQuAD_Dataset(train_encodings)
valid_dataset = SQuAD_Dataset(valid_encodings)

### **Dataloaders 🔁**

In [None]:
from torch.utils.data import DataLoader

# Define the dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32)

In [None]:
len(train_loader), len(valid_loader)

(1095, 67)

In [None]:
len(train_contexts), len(valid_contexts)

(17519, 2114)

## **Fine-Tuning ⚙️**

### **Model definition 🤖**

We are going to use the `bert-case-uncased` from the huggingface transformers.

In [None]:
from transformers import BertForQuestionAnswering, AutoModel

model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
# model = AutoModel.from_pretrained('bert-base-uncased')

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased a

### **Training 🏋️‍♂️**

Μy choices for some parameters:

* Use of `AdamW` which is a stochastic optimization method that modifies the typical implementation of weight decay in Adam, by decoupling weight decay from the gradient update. This helps to avoid overfitting which is necessary in this case were the model is very complex.

* Set the `lr=5e-5` as I read that this is the best value for the learning rate for this task.

In [None]:
# Check on the available device - use GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Working on {device}')

Working on cuda


In [None]:
# from sklearn.metrics import classification_report, f1_score
# import torch
# import numpy as np

# from transformers import AdamW

# N_EPOCHS = 3
# optim = AdamW(model.parameters(), lr=1e-5)

# model.to(device)
# model.train()
# best_epoch = 0
# best_val_loss = float("inf")
# for epoch in range(N_EPOCHS):

#   loop = tqdm(train_loader, leave=True)
#   for batch in loop:
#     optim.zero_grad()
#     input_ids = batch['input_ids'].to(device)
#     attention_mask = batch['attention_mask'].to(device)
#     start_positions = batch['start_positions'].to(device)
#     end_positions = batch['end_positions'].to(device)
#     outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
#     loss = outputs[0]
#     loss.backward()
#     optim.step()

#     loop.set_description(f'Epoch {epoch+1}')
#     loop.set_postfix(loss=loss.item())
  

#   model.eval()
#   predictions, true_labels = [], []
#   val_loss = 0

#   with torch.no_grad():
#     for batch in valid_loader:
#       input_ids = batch['input_ids'].to(device)
#       attention_mask = batch['attention_mask'].to(device)
#       start_positions = batch['start_positions'].to(device)
#       end_positions = batch['end_positions'].to(device)

#       outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
      
#       loss = outputs[0]
#       val_loss += loss.item()

#       start_logits, end_logits = outputs[1], outputs[2]
#       start_logits = torch.argmax(start_logits, dim=1)
#       end_logits = torch.argmax(end_logits, dim=1)

#       predictions.extend([start_logits[i].item(), end_logits[i].item()] for i in range(len(start_logits)))
#       true_labels.extend([start_positions[i].item(), end_positions[i].item()] for i in range(len(start_positions)))

#     val_loss /= len(valid_loader)

#   # # Flatten the true labels and predictions
#   # true_labels_flat = np.array(true_labels).reshape(-1, 2)
#   # predictions_flat = np.array(predictions).reshape(-1, 2)

#   # report = classification_report(true_labels, predictions, digits=4, output_dict=True)
#   # f1 = report['weighted avg']['f1-score']
#   # precision = report['weighted avg']['precision']
#   # recall = report['weighted avg']['recall']
#   # support = report['weighted avg']['support']

#   # print(f'Epoch {epoch+1} evaluation report:')
#   # print(classification_report(true_labels, predictions, digits=4))

# #   if val_loss < best_val_loss:
# #     best_val_loss = val_loss
# #     path = '/content/gdrive/MyDrive/NLP Project/models/BERT_QA_clean20'
# #     torch.save(model.state_dict(), path+'/model_parameters.pth')

# # print(f'Best F1 score: {best_f1}')

#   if val_loss < best_val_loss:
#     best_epoch = epoch + 1
#     best_val_loss = val_loss
#     # torch.save(model.state_dict(), "t5_sentiment_model.pt")
#     path = '/content/gdrive/MyDrive/NLP Project/models/BERT_QA_noise10'
#     torch.save(model.state_dict(), path+'/model_parameters.pth')
  
#   print(f"Epoch {epoch + 1} - Val Loss: {val_loss:.4f}. \t Current best epoch is {best_epoch} with val loss - {best_val_loss:.4f}")


Epoch 1: 100%|██████████| 1095/1095 [26:31<00:00,  1.45s/it, loss=1.5]


Epoch 1 - Val Loss: 1.5910. 	 Current best epoch is 1 with val loss - 1.5910


Epoch 2: 100%|██████████| 1095/1095 [25:32<00:00,  1.40s/it, loss=0.764]


Epoch 2 - Val Loss: 1.3559. 	 Current best epoch is 2 with val loss - 1.3559


Epoch 3: 100%|██████████| 1095/1095 [25:31<00:00,  1.40s/it, loss=0.718]


Epoch 3 - Val Loss: 1.4873. 	 Current best epoch is 2 with val loss - 1.3559


### Testing

In [None]:
# Load model for evaluations

path = '/content/gdrive/MyDrive/NLP Project/models/BERT_QA_noise10'
model.load_state_dict(torch.load(path+'/model_parameters.pth'))

model = model.to(device)

In [None]:
import random
from datasets import load_dataset
from torch.utils.data import DataLoader

squad_test = load_dataset("squad")
valid_contexts, valid_questions, valid_answers = read_data(squad_test, 'validation')

valid_size = int(0.2 * len(squad_test['validation']))
valid_indices = random.sample(range(len(squad['validation'])), valid_size)
valid_contexts = []
valid_questions = []
valid_answers = []

for idx in valid_indices:
  group = squad_test['validation'][idx]
  valid_contexts.append(group['context'])
  valid_questions.append(group['question'])
  valid_answers.append(group['answers'])

add_end_idx(valid_answers, valid_contexts)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)
add_token_positions(valid_encodings, valid_answers)
valid_dataset = SQuAD_Dataset(valid_encodings)
valid_loader = DataLoader(valid_dataset, batch_size=16)



  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model.eval()

acc = []
em_score = []
f1_score = []

for batch in tqdm(valid_loader):
  with torch.no_grad():
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_true = batch['start_positions'].to(device)
    end_true = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask)

    start_pred = torch.argmax(outputs['start_logits'], dim=1)
    end_pred = torch.argmax(outputs['end_logits'], dim=1)

    acc.append(((start_pred == start_true).sum()/len(start_pred)).item())
    acc.append(((end_pred == end_true).sum()/len(end_pred)).item())

    # Calculate EM score and F1 score
    for i in range(len(start_true)):
        pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i] + 1], skip_special_tokens=True)
        true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i] + 1], skip_special_tokens=True)
        em_score.append(int(pred_span == true_span))

        # Compute F1 score
        common = Counter(pred_span.split()) & Counter(true_span.split())
        num_common = sum(common.values())
        precision = num_common / max(len(pred_span.split()), 1)
        recall = num_common / max(len(true_span.split()), 1)
        f1 = (2 * precision * recall) / max((precision + recall), 1e-8)
        f1_score.append(f1)


acc = sum(acc) / len(acc)
em_score = sum(em_score) / len(em_score)
f1_score = sum(f1_score) / len(f1_score)

print("\n\nAccuracy:", acc)
print("EM score:", em_score)
print("F1 score:", f1_score)

print("\n\nT/P\tanswer_start\tanswer_end\n")
for i in range(len(start_true)):
  pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i]+1], skip_special_tokens=True)
  true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i]+1], skip_special_tokens=True)
  print(f"true\t{start_true[i]}\t{end_true[i]}\t{true_span}\n"
        f"pred\t{start_pred[i]}\t{end_pred[i]}\t{pred_span}\n")


100%|██████████| 133/133 [01:06<00:00,  2.00it/s]



Accuracy: 0.6306390977443609
EM score: 0.5056764427625354
F1 score: 0.6788178082372972


T/P	answer_start	answer_end

true	29	29	four
pred	29	29	four

true	27	40	from nova scotia and newfoundland in the north, to georgia in the south
pred	21	25	eastern coast of the continent






### Evaluation on Noisy Data

In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw

# Noise funcs 

# FOR QA ONLY
char_action = ['insert',
        'substitute',
        'delete',
        'swap',
]

def get_action(type):
    return random.choice(char_action)

def add_noise(question, p=0.7):
    """
    Augment a tweet with character-level and word-level noise.

    Args:
        tweet (str): The original tweet.
        p (float): The probability of applying the char level augmentation.

    Returns:
        str: The augmented question.
    """
    # Define a list of character-level augmentation techniques
    char_augmenters = [
        nac.KeyboardAug(aug_char_p=0.2, aug_word_p=0.2, include_special_char=False, include_numeric=False),
        nac.RandomCharAug(action=get_action("char"), aug_char_p=0.1, aug_word_p=0.1),
    ]

    # Define a list of word-level augmentation techniques
    word_augmenters = [
        naw.SpellingAug(),
        naw.SynonymAug(),
    ]

    # Randomly apply a character-level or word-level augmentation with probability p
    if random.random() < p:
        aug = random.choice(char_augmenters)
        noisy_text = aug.augment(question)
    else:
        aug = random.choice(word_augmenters)
        noisy_text = aug.augment(question)
      
    return noisy_text[0]


In [None]:
import random
from datasets import load_dataset
from torch.utils.data import DataLoader

squad_test = load_dataset("squad")
valid_contexts, valid_questions, valid_answers = read_data(squad_test, 'validation')

import random
random.seed(42)
random_noise = random.uniform(0.05, 0.15)
print(random_noise)

noisy = True
noise_percent = random_noise
if noisy:

  for i, question in enumerate(valid_questions):

    if random.random() < noise_percent:
      noisy_question = add_noise(question)
      valid_questions[i] = noisy_question
      
  print('added noise')

valid_size = int(0.2 * len(squad_test['validation']))
valid_indices = random.sample(range(len(squad['validation'])), valid_size)
valid_contexts = []
valid_questions = []
valid_answers = []

for idx in valid_indices:
  group = squad_test['validation'][idx]
  valid_contexts.append(group['context'])
  valid_questions.append(group['question'])
  valid_answers.append(group['answers'])

add_end_idx(valid_answers, valid_contexts)
valid_encodings = tokenizer(valid_contexts, valid_questions, truncation=True, padding=True)
add_token_positions(valid_encodings, valid_answers)
valid_dataset = SQuAD_Dataset(valid_encodings)
valid_loader = DataLoader(valid_dataset, batch_size=16)



  0%|          | 0/2 [00:00<?, ?it/s]

0.11394267984578837
added noise


In [None]:
path = '/content/gdrive/MyDrive/NLP Project/models/BERT_QA_noise10'
model.load_state_dict(torch.load(path+'/model_parameters.pth'))

model = model.to(device)
model.eval()

acc = []
em_score = []
f1_score = []

for batch in tqdm(valid_loader):
  with torch.no_grad():
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_true = batch['start_positions'].to(device)
    end_true = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask)

    start_pred = torch.argmax(outputs['start_logits'], dim=1)
    end_pred = torch.argmax(outputs['end_logits'], dim=1)

    acc.append(((start_pred == start_true).sum()/len(start_pred)).item())
    acc.append(((end_pred == end_true).sum()/len(end_pred)).item())

    # Calculate EM score and F1 score
    for i in range(len(start_true)):
        pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i] + 1], skip_special_tokens=True)
        true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i] + 1], skip_special_tokens=True)
        em_score.append(int(pred_span == true_span))

        # Compute F1 score
        common = Counter(pred_span.split()) & Counter(true_span.split())
        num_common = sum(common.values())
        precision = num_common / max(len(pred_span.split()), 1)
        recall = num_common / max(len(true_span.split()), 1)
        f1 = (2 * precision * recall) / max((precision + recall), 1e-8)
        f1_score.append(f1)


acc = sum(acc) / len(acc)
em_score = sum(em_score) / len(em_score)
f1_score = sum(f1_score) / len(f1_score)

print("\n\nAccuracy:", acc)
print("EM score:", em_score)
print("F1 score:", f1_score)

print("\n\nT/P\tanswer_start\tanswer_end\n")
for i in range(len(start_true)):
  pred_span = tokenizer.decode(input_ids[i][start_pred[i]:end_pred[i]+1], skip_special_tokens=True)
  true_span = tokenizer.decode(input_ids[i][start_true[i]:end_true[i]+1], skip_special_tokens=True)
  print(f"true\t{start_true[i]}\t{end_true[i]}\t{true_span}\n"
        f"pred\t{start_pred[i]}\t{end_pred[i]}\t{pred_span}\n")


100%|██████████| 133/133 [01:07<00:00,  1.97it/s]



Accuracy: 0.6257048872180451
EM score: 0.4981078524124882
F1 score: 0.6734449315454004


T/P	answer_start	answer_end

true	21	30	british blockade of the french coastline limited french shipping.
pred	100	100	smallpox

true	62	65	235 additional television stations
pred	47	53	eight owned - and - operated stations






### EXTRA STUFF


In [None]:
def get_prediction(context, question):
  inputs = tokenizer.encode_plus(question, context, return_tensors='pt').to(device)
  outputs = model(**inputs)
  
  answer_start = torch.argmax(outputs[0])  
  answer_end = torch.argmax(outputs[1]) + 1 
  
  answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
  
  return answer

def normalize_text(s):
  """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
  import string, re
  def remove_articles(text):
    regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
    return re.sub(regex, " ", text)
  def white_space_fix(text):
    return " ".join(text.split())
  def remove_punc(text):
    exclude = set(string.punctuation)
    return "".join(ch for ch in text if ch not in exclude)
  def lower(text):
    return text.lower()

  return white_space_fix(remove_articles(remove_punc(lower(s))))

def exact_match(prediction, truth):
    return bool(normalize_text(prediction) == normalize_text(truth))

def compute_f1(prediction, truth):
  pred_tokens = normalize_text(prediction).split()
  truth_tokens = normalize_text(truth).split()
  
  # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
  if len(pred_tokens) == 0 or len(truth_tokens) == 0:
    return int(pred_tokens == truth_tokens)
  
  common_tokens = set(pred_tokens) & set(truth_tokens)
  
  # if there are no common tokens then f1 = 0
  if len(common_tokens) == 0:
    return 0
  
  prec = len(common_tokens) / len(pred_tokens)
  rec = len(common_tokens) / len(truth_tokens)
  
  return round(2 * (prec * rec) / (prec + rec), 2)
  
def question_answer(context, question,answer):
  prediction = get_prediction(context,question)
  em_score = exact_match(prediction, answer)
  f1_score = compute_f1(prediction, answer)

  print(f'Question: {question}')
  print(f'Prediction: {prediction}')
  print(f'True Answer: {answer}')
  print(f'Exact match: {em_score}')
  print(f'F1 score: {f1_score}\n')

**Beyoncé**

In [None]:
context = """Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, 
          songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing 
          and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. 
          Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. 
          Their hiatus saw the release of Beyoncé\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, 
          earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy"."""


questions = ["For whom the passage is talking about?",
             "When did Beyonce born?",
             "Where did Beyonce born?",
             "What is Beyonce's nationality?",
             "Who was the Destiny's group manager?",
             "What name has the Beyoncé's debut album?",
             "How many Grammy Awards did Beyonce earn?",
             "When did the Beyoncé's debut album release?",
             "Who was the lead singer of R&B girl-group Destiny's Child?"]

answers = ["Beyonce Giselle Knowles - Carter", "September 4, 1981", "Houston, Texas", 
           "American", "Mathew Knowles", "Dangerously in Love", "five", "2003", 
           "Beyonce Giselle Knowles - Carter"]

for question, answer in zip(questions, answers):
  question_answer(context, question, answer)

Question: For whom the passage is talking about?
Prediction: destiny ' s child
True Answer: Beyonce Giselle Knowles - Carter
Exact match: False
F1 score: 0

Question: When did Beyonce born?
Prediction: 1981
True Answer: September 4, 1981
Exact match: False
F1 score: 0.5

Question: Where did Beyonce born?
Prediction: houston, texas
True Answer: Houston, Texas
Exact match: True
F1 score: 1.0

Question: What is Beyonce's nationality?
Prediction: 
True Answer: American
Exact match: False
F1 score: 0

Question: Who was the Destiny's group manager?
Prediction: mathew knowles
True Answer: Mathew Knowles
Exact match: True
F1 score: 1.0

Question: What name has the Beyoncé's debut album?
Prediction: destiny ' s child
True Answer: Dangerously in Love
Exact match: False
F1 score: 0

Question: How many Grammy Awards did Beyonce earn?
Prediction: destiny ' s child
True Answer: five
Exact match: False
F1 score: 0

Question: When did the Beyoncé's debut album release?
Prediction: 1981
True Answer: 20

**Athens**

In [None]:
context = """Athens is the capital and largest city of Greece. Athens dominates the Attica region and is one of the world's oldest cities, 
             with its recorded history spanning over 3,400 years and its earliest human presence starting somewhere between the 11th and 7th millennium BC.
             Classical Athens was a powerful city-state. It was a center for the arts, learning and philosophy, and the home of Plato's Academy and Aristotle's Lyceum.
             It is widely referred to as the cradle of Western civilization and the birthplace of democracy, largely because of its cultural and political impact on the European continent—particularly Ancient Rome.
             In modern times, Athens is a large cosmopolitan metropolis and central to economic, financial, industrial, maritime, political and cultural life in Greece. 
             In 2021, Athens' urban area hosted more than three and a half million people, which is around 35% of the entire population of Greece.
             Athens is a Beta global city according to the Globalization and World Cities Research Network, and is one of the biggest economic centers in Southeastern Europe. 
             It also has a large financial sector, and its port Piraeus is both the largest passenger port in Europe, and the second largest in the world."""

questions = ["Which is the largest city in Greece?",
             "For what was the Athens center?",
             "Which city was the home of Plato's Academy?"]

answers = ["Athens", "center for the arts, learning and philosophy", "Athens"]

for question, answer in zip(questions, answers):
  question_answer(context, question, answer)

Question: Which is the largest city in Greece?
Prediction: athens
True Answer: Athens
Exact match: True
F1 score: 1.0

Question: For what was the Athens center?
Prediction: arts, learning and philosophy
True Answer: center for the arts, learning and philosophy
Exact match: False
F1 score: 0.8

Question: Which city was the home of Plato's Academy?
Prediction: athens is the capital and largest city of greece
True Answer: Athens
Exact match: False
F1 score: 0.22



**Angelos**

In [None]:
context = """Angelos Poulis was born on 8 April 2001 in Nicosia, Cyprus. He is half Cypriot and half Greek. 
            He is currently studying at the Department of Informatics and Telecommunications of the University of Athens in Greece. 
            His scientific interests are in the broad field of Artificial Intelligence and he loves to train neural networks! 
            Okay, I'm Angelos and I'll stop talking about me right now."""

questions = ["When did Angelos born?",
             "In what university is Angelos studying now?",
             "What is Angelos' nationality?",
             "What are his scientific interests?",
             "What I will do right now?"]

answers = ["8 April 2001", "University of Athens", 
           "half Cypriot and half Greek", "Artificial Intelligence", 
           "stop talking about me"]

for question, answer in zip(questions, answers):
  question_answer(context, question, answer)

Question: When did Angelos born?
Prediction: 8 april 2001
True Answer: 8 April 2001
Exact match: True
F1 score: 1.0

Question: In what university is Angelos studying now?
Prediction: 
True Answer: University of Athens
Exact match: False
F1 score: 0

Question: What is Angelos' nationality?
Prediction: cypriot
True Answer: half Cypriot and half Greek
Exact match: False
F1 score: 0.33

Question: What are his scientific interests?
Prediction: 
True Answer: Artificial Intelligence
Exact match: False
F1 score: 0

Question: What I will do right now?
Prediction: half cypriot and half greek. he is currently studying at the department of informatics and telecommunications of the university of athens in greece. his scientific interests are in the broad field of artificial intelligence
True Answer: stop talking about me
Exact match: False
F1 score: 0



## **Summary (and some Questions & Answers) 🧐**

**Technical details:**
* **Model used:** `bert-base-uncased`
* **Dataset:** The Stanford Question Answering Dataset (SQuAD)  
* **Run time:** ~ 4 hours on the Tesla P100 GPU for `N_EPOCHS = 3`. Each epoch took about 1 hour and 15 minutes for training. I think if we run the model for at least `N_EPOCHS = 5` we can get even better results, but what we got for 3 epochs is already very good!

**Conclusion:** We can say that training the model for just 3 epochs, which took about 4 hours on the Tesla P100 GPU, gives us pretty good results. The model can also answer quite well to questions about contents it hasn't seen before and I can say this because I gave it a passage for myself!

Some *example questions and answers* we get are the following:

**About Athens:**

> **Question:** Which is the largest city in Greece?  
  **Prediction:** athens  
  **True Answer:** Athens  
  **Exact match:** True  
  **F1 score:** 1.0  

> **Question:** For what was the Athens center?  
  **Prediction:** center for the arts, learning and philosophy  
  **True Answer:** center for the arts, learning and philosophy  
  **Exact match:** True  
  **F1 score:** 1.0  

**About Beyoncé:**

> **Question:** When did Beyonce born?  
  **Prediction:** september 4, 1981  
  **True Answer:** September 4, 1981  
  **Exact Match:** True	 
  **F1 score:** 1.0

> **Question:** What name has the Beyoncé's debut album?  
  **Prediction:** dangerously in love  
  **True Answer:** Dangerously in Love   
  **Exact Match:** True  
  **F1 score:** 1.0

> **Question:** How many Grammy Awards did Beyonce earn?  
  **Prediction:** five  
  **True Answer:** five  
  **Exact Match:** True  
  **F1 score:** 1.0


> **Question:** When did the Beyoncé's debut album release?  
  **Prediction:** 2003  
  **True Answer:** 2003  
  **Exact Match:** True  
  **F1 score:** 1.0


> **Question:** Who was the lead singer of R&B girl-group Destiny's Child?  
  **Prediction:** beyonce giselle knowles - carter  
  **True Answer:** Beyonce Giselle Knowles - Carter  
  **Exact Match:** True  
  **F1 score:** 1.0


**About Angelos:**

> **Question:** When did Angelos born?  
  **Prediction:** 8 april 2001  
  **True Answer:** 8 April 2001  
  **Exact match:** True  
  **F1 score:** 1.0

> **Question:** In what university is Angelos studying now?  
  **Prediction:** university of athens  
  **True Answer:** University of Athens  
  **Exact match:** True    
  **F1 score:** 1.0

> **Question:** What is Angelos' nationality?  
  **Prediction:** half cypriot and half greek.  
  **True Answer:** half Cypriot and half Greek   
  **Exact match:** True  
  **F1 score:** 0.8

> **Question:** What are his scientific interests?  
  **Prediction:** artificial intelligence  
  **True Answer:** Artificial Intelligence    
  **Exact match:** True  
  **F1 score:** 1.0

> **Question:** What I will do right now?  
  **Prediction:** stop talking about me  
  **True Answer:** stop talking about me  
  **Exact match:** True  
  **F1 score:** 1.0


### GROUND TRUTH VS MODEL PREDICTIONS

In [None]:
[['lack of net force', 'lack of net force'],
 ['gymnosperms', 'gymnosperms'],
 ['patents', ''],
 ['green', 'john and benjamin green'],
 ['declines', 'gdp growth actually declines'],
 ['camp pendleton', 'camp pendleton'],
 ['beroids', 'beroids'],
 ['1869', '1869'],
 ['tuesday', 'tuesday'],
 ['5k resolution', '5k resolution'],
 ['number one', ''],
 ['li.', ''],
 ['a new playing surface',
  're - sodded the field with a new playing surface ; a hybrid bermuda 419 turf'],
 ['sap center in san jose.', 'sap center in san jose'],
 ['lighting systems', 'rival lighting systems'],
 ['temperance', 'temperance movement'],
 ['mediterranean', 'mediterranean'],
 ['isaac newton', 'isaac newton'],
 ['mental health ( care and treatment ) ( scotland ) act 2003',
  'the mental health ( care and treatment ) ( scotland ) act 2003'],
 ['april 1887', 'april 1887'],
 ['physicist', 'italian physicist'],
 ['1696', '1696'],
 ['torchwood : miracle day', 'torchwood'],
 ['142 pounds', '142 pounds ( 64 kg )'],
 ['four - course rate average', 'four'],
 ['stroma - containing tubule', 'tubule'],
 ['new york city o & o wabc - tv and philadelphia o & o wpvi - tv',
  'wabc - tv and philadelphia o & o wpvi - tv'],
 ['1920', '1920'],
 ['18, 000 regulars, militia and native american allies', '3, 600'],
 ['environmental determinism', 'environmental determinism'],
 ['stanford university', 'stanford university'],
 ['j. s. bach', 'j. s. bach'],
 ['the paramount building', '1501 broadway in manhattan'],
 ['1795', '1795'],
 ['fled',
  'defeating small fractions of the khwarzemi forces instead of facing a unified defense. the mongol army quickly seized the town of otrar, relying on superior strategy and tactics. genghis khan ordered the wholesale massacre of many of the civilians'],
 ['family member', 'family member'],
 ['tang, song, as well as khitan liao and jurchen jin dynasties',
  'tang, song, as well as khitan liao and jurchen jin dynasties'],
 ['between the 1960s and 1990s', 'between the 1960s and 1990s'],
 ['8. 4 %', '8. 4 %'],
 ['the unfair commercial practices directive',
  'unfair commercial practices directive'],
 ['in the castle church', 'the castle church in wittenberg'],
 ['1893', '1893'],
 ['when the oxygen concentration is too high',
  'when the oxygen concentration is too high.'],
 ['lute',
  'singing of german hymns in connection with worship, school, home, and the public arena. he often accompanied the sung hymns with a lute'],
 ['person or group of people', ''],
 ['failed',
  'the 1979 scottish devolution referendum to establish a devolved scottish assembly failed'],
 ['45 – 60 nanometers across', 'about 45 – 60 nanometers across'],
 ["doctorin'the tardis", "doctorin'the tardis"],
 ['1964', '1964'],
 ['several thousand', 'several thousand'],
 ['the kenya certificate of secondary education',
  'the kenya certificate of primary education'],
 ['1879', '1879'],
 ['sleep in peace', ''],
 ['£34m', '£34m per year'],
 ['huntington boulevard', 'huntington boulevard'],
 ['jingshi dadian', 'jingshi dadian'],
 ['texas', 'texas'],
 ['a deficit.', 'a deficit'],
 ['the great north run', 'great north run'],
 ['reneged',
  'reneged on his surrender terms and executed every soldier that had taken arms against him at samarkand'],
 ['a series of power blackouts across the country', 'the assassination'],
 ['peyton manning', 'peyton manning'],
 ['wang zhen', 'wang zhen'],
 ['groton school', 'groton school'],
 ['terry nation', 'terry nation'],
 ['stanford university',
  'san jose marriott. the broncos practiced at stanford university'],
 ['jordan norwood', ''],
 ['technologies and ideas', 'technologies and ideas'],
 ['manning', 'gary kubiak'],
 ['cam newton', 'cam newton'],
 ['jean - marc bosman', 'jean - marc bosman'],
 ['april 1970', 'april 1970'],
 ['the arrow', 'the arrow'],
 ['the sculptor', ''],
 ['applied', 'applied force'],
 ['2002', '2002'],
 ['tesla coil', 'tesla coil'],
 ['abc entertainment',
  'disney – abc television group merged abc entertainment and abc studios into a new division, abc entertainment group, which would be responsible for both its production and broadcasting operations. during this reorganization, the group announced that it would lay off 5 % of its workforce. on april 2, 2009, citadel communications announced that it would rebrand abc radio as citadel media ; however, abc news continued to provide news content for citadel. on december 22, disney – abc television group announced a partnership with apple inc.'],
 ['in peacekeeping missions around the world', 'peacekeeping missions'],
 ['tentilla', 'tentilla'],
 ['turning the whole climate science assessment process into a moderated " living " wikipedia - ipcc',
  'fourth assessment report'],
 ['four', 'four of eight songs'],
 ['immunization', 'immunization'],
 ['the liao dynasty', 'jin dynasty'],
 ['300 km long', '300 km long'],
 ['interacting and working directly with students',
  'interacting and working directly'],
 ['houston, texas', ''],
 ['to the state and its laws', 'relation to the state and its laws'],
 ['nun komm, der heiden heiland', '" nun komm, der heiden heiland'],
 ['2. 5 million', '2. 5 million'],
 ['the swahili', 'swahili'],
 ['on october 6, 1973', 'october 6, 1973'],
 ['local talent', 'local talent'],
 ['$ 3. 5 billion', "$ 3. 5 billion and $ 118 for each of abc's shares"],
 ['11', '21 to 11'],
 ['daniel b. burke', 'daniel b. burke'],
 ['continual motion along the fault', 'continual motion'],
 ['los angeles kings', 'los angeles kings'],
 ['enric miralles', 'enric miralles'],
 ['jamukha', 'jamukha'],
 ['bruno mars', 'bruno mars'],
 ['chebyshev', 'chebyshev'],
 ['bbc radio 5', 'bbc radio 5 live and 5 live sports extra'],
 ['threatened " old briton " with severe consequences if he continued to trade with the british',
  'threatened " old briton " with severe consequences'],
 ['the metropolitan police authority', 'the metropolitan police authority'],
 ['arms', 'the history of arms'],
 ['defensins', 'defensins'],
 ['jeronimo de ayanz y beaumont', 'jeronimo de ayanz y beaumont'],
 ['burned liquid hydrogen', 'restart the engine for translunar injection'],
 ['egyptians', 'egyptians'],
 ['inferior', 'a negative vision of itself'],
 ['book of discipline', 'the book of discipline'],
 ['gaulish name renos', 'gaulish name renos'],
 ['decreases', 'decreases'],
 ['nine', 'nine'],
 ['paul rand', 'paul rand'],
 ['demaryius thomas', 'demaryius thomas'],
 ['western portions of the great lakes region',
  'western portions of the great lakes region'],
 ['independent components',
  'vector addition yields the original force. resolving force vectors into components of a set of basis vectors is often a more mathematically clean way to describe forces than using magnitudes and directions. this is because, for orthogonal components, the components of the vector sum are uniquely determined by the scalar addition of the components of the individual vectors. orthogonal components are independent of each other because forces acting at ninety degrees to each other have no effect on the magnitude or direction of the other'],
 ['2001', 'in 2001'],
 ['17 %', '17 % to $ 8. 7 billion'],
 ['universities and / or tafe colleges',
  'universities and / or tafe colleges'],
 ['france', 'france'],
 ['many places', 'statue of little insurgent'],
 ['0. 2', ''],
 ['anderson', 'bennie fowler'],
 ['emotional contagion', 'emotional contagion'],
 ['brick - and - mortar community pharmacies that serve consumers online and those that walk in their door',
  'their physicians'],
 ['the european parliament and the council of the european union',
  'council of the european union'],
 ['14', '14'],
 ['a shortage of male teachers', 'a shortage of male teachers'],
 ['commutative ring r', ''],
 ['concurring, smaller assessments of special problems', 'consensus'],
 ['$ 5 million', '$ 5 million'],
 ['commission v austria', 'commission v austria'],
 ['mouth of the monongahela river ( the site of present - day pittsburgh, pennsylvania )',
  'the mouth of the monongahela river'],
 ['11 of the then 12 member states', '11 of the then 12'],
 ['£30m', '£30m'],
 ['two', 'two'],
 ['2005', '2005'],
 ['salt and iron', 'salt and iron'],
 ['the european court of justice', 'the european court of justice'],
 ['with international criminal court trial dates in 2013 for both president kenyatta and deputy president william ruto',
  'president kenyatta and deputy president william ruto related to the 2007 election aftermath, us president barack obama chose not to visit the country during his mid - 2013 african trip.'],
 ['390', '390'],
 ['lake balkhash', 'the caspian sea'],
 ['1672', '1672'],
 ['2001', 'in 2007'],
 ['9th', '9th'],
 ['increased blood flow into tissue', 'increased blood flow into tissue'],
 ['kinetic friction', 'kinetic friction'],
 ['" tiger team "', '" tiger team "'],
 ['expelled jews', 'expelled'],
 ['fighting horsemen', 'fighting horsemen'],
 ['invocavit sermons', 'invocavit sermons'],
 ['phagosome', 'phagosome'],
 ['john wesley', 'john wesley'],
 ['independently developed the same message routing methodology as developed by baran',
  'message routing methodology as developed by baran. he called it packet switching'],
 ['pac - 12', 'pac - 12'],
 ['increased by about 70 %.', '70 %'],
 ['7500 yr ago', '7500 yr ago'],
 ['the mantle', ''],
 ['soluble components ( molecules )', 'phagocytes'],
 ['an interactive host to host connection was made between the ibm mainframe computer systems at the university of michigan in ann arbor and wayne state',
  'connections to the cdc mainframe'],
 ['late 1545', 'late 1545'],
 ['arm', 'arm'],
 ['january 27, 1967', 'january 27, 1967'],
 ['nolo contendere', 'nolo contendere'],
 ['often many times', 'many times'],
 ['74', '14'],
 ['parallelogram',
  'parallelogram rule of vector addition : the addition of two vectors represented by sides of a parallelogram, gives an equivalent resultant vector that is equal in magnitude and direction to the transversal of the parallelogram'],
 ['1957', '1957'],
 ['mercuric oxide', 'mercuric oxide'],
 ['decision problems', 'decision'],
 ['anticlines and synclines', 'anticlines and synclines'],
 ['federica mogherini', 'jean - claude juncker'],
 ['american institute of electrical engineers',
  'american institute of electrical engineers'],
 ['politically and socially unstable', 'politically and socially unstable'],
 ['lady gaga', 'lady gaga'],
 ['within the premises of the hospital',
  'within the premises of the hospital'],
 ['30. 0 %', '30. 0 %'],
 ['signing of the treaty of paris on 10 february 1763', '10 february 1763'],
 ['storage conditions, compulsory texts, equipment, etc.',
  'storage conditions'],
 ['religious and in no respect racial',
  'his position was entirely religious and in no respect racial'],
 ['1995 – 96 season', '1993 – 94'],
 ['begter', 'begter'],
 ['sierra sky park', 'sierra sky park'],
 ['shaq thompson', 'linebacker shaq thompson'],
 ['cape town', 'cape of good hope'],
 ['passive immunity', 'passive'],
 ['automobiles', 'automobiles'],
 ['hanna - barbera', 'hanna - barbera'],
 ['josiah wedgwood, william de morgan and bernard leach',
  'josiah wedgwood, william de morgan and bernard leach'],
 ['under intense light', 'turning sideways'],
 ["l'eglise francaise a la nouvelle - amsterdam",
  "the french church in new amsterdam ). this parish continues today as l'eglise du saint - esprit"],
 ['circuit switching', ''],
 ['afrikaans', 'afrikaans'],
 ['elway', 'elway'],
 ['ac power', 'ac power'],
 ['2002', '2002'],
 ['a council of mongol chiefs', 'genghis khan'],
 ['second scale shows the most recent eon with an expanded scale', ''],
 ['nonviolent',
  'modern [ vague ] statement of the principle of nonviolent protest'],
 ['aboral organ', 'aboral organ'],
 ['newton', "newton's"],
 ['kenya african national union ( kanu ) of jomo kenyatta', 'jomo kenyatta'],
 ['first world war.', 'due to the outbreak of the first world war'],
 ['the calvin cycle', 'the calvin cycle'],
 ['zaju', 'zaju'],
 ['manning', 'manning'],
 ['oxygen toxicity',
  'oxygen toxicity to the lungs and central nervous system can also occur in deep scuba diving and surface supplied diving. prolonged breathing of an air mixture with an o 2 partial pressure more than 60 kpa can eventually lead to permanent pulmonary fibrosis'],
 ['complexity classes',
  'complexity classes can be defined by bounding the time or space used by the algorithm'],
 ['uhf tuning', 'uhf'],
 ['traditional visor helmet', 'visor helmet'],
 ['cannot be written as a product of two ring elements that are not units',
  'not a unit'],
 ['shangdu', 'shangdu'],
 ['$ 474 million', '$ 474 million'],
 ['eight', 'eight'],
 ['groups of large, stiffened cilia', 'stiffened cilia'],
 ['roy strong', 'roy strong'],
 ['charity and good works',
  'faith alone, whether fiduciary or dogmatic, cannot justify man ; justification rather depends only on such faith as is active in charity and good works'],
 ['1963', '1963'],
 ['europe', 'the caucasus, north africa, and the horn of africa'],
 ['two', 'two'],
 ['unnatural',
  'unnatural or forced motion, which required continued application of a force'],
 ['captain america : civil war', 'captain america : civil war'],
 ['fresno street and thorne ave', 'fresno street and thorne ave'],
 ['april 1943', '19 april 1943'],
 ['3. 55 inches', '3. 55 inches'],
 ['high - gain s - band antenna', 's - band'],
 ['cylinder volume', 'larger cylinder volume'],
 ['mantle', 'mantle'],
 ['thermal expansion', 'thermal expansion'],
 ['spin', 'the spin'],
 ['foreign protestants naturalization act',
  'foreign protestants naturalization act'],
 ['does not faithfully summarize the full wgi report',
  'does not faithfully summarize the full wgi report'],
 ['fundamental rights recognised and protected in the constitutions of member states',
  'fundamental rights'],
 ['democratic', 'democratic process'],
 ['their families', 'keelmen and their families'],
 ['controlled, experimental studies', 'experimental'],
 ['alberta and british columbia', 'alberta and british columbia'],
 ['liquid nitrogen', 'liquid nitrogen'],
 ['personal presence', "trust god's word"],
 ['several', 'several'],
 ['glowed even when turned off', 'glowed'],
 ['broken', 'three of his ribs'],
 ['the world systems theory', 'the world systems theory'],
 ['university of washington', 'university of washington'],
 ['pressure terms', 'shear terms'],
 ['new personality', 'new personality'],
 ['priests, religious leaders, and case workers as well as teachers',
  'teacher'],
 ['food in the form of sugars', 'food'],
 ['sanders', 'sanders'],
 ['anti - colonial movements', 'anti - colonial movements'],
 ['rankine', 'the rankine cycle'],
 ['265. 7 nautical miles', '492. 1 km )'],
 ['saddam hussein', "saddam hussein's"],
 ['morgan', 'morgan'],
 ['lutheran and reformed', 'lutheran'],
 ['ten', 'ten'],
 ['metals', ''],
 ['luther', 'luther'],
 ['obesity, alcoholism, and drug use', 'obesity, alcoholism, and drug use'],
 ['1971', '1971'],
 ['proteins', 'proteins'],
 ['mick mixon', 'dave logan'],
 ['pattern recognition receptors', 'pattern recognition receptors'],
 ['verizon wireless customers', 'verizon wireless'],
 ['left graz',
  'severed all relations with his family to hide the fact that he dropped out of school'],
 ['thomas edison',
  "thomas edison and george westinghouse that had been simmering since westinghouse's first ac system in 1886 and had reached the point of all - out warfare by 1888. this started out as a competition between rival lighting systems with edison"],
 ['1767', '1767'],
 ['income from the harvests of their chinese tenants',
  'harvests of their chinese tenants'],
 ['mongol and turkic tribes', 'mongol and turkic'],
 ['newcastle college', 'newcastle college'],
 ['1317', '15th century'],
 ['only a little', 'a little time will pass'],
 ['1993', '1993'],
 ['1321 to 1323', '1321 to 1323'],
 ['the restriction modification system', 'restriction modification system'],
 ['20th', '20th'],
 ['representatives appointed by governments and organizations',
  'representatives appointed by governments and organizations'],
 ['2010', '2010'],
 ['may 21, 2013', '2013'],
 ['32, 463', '32, 463'],
 ['advanced steam', 'advanced steam movement'],
 ['super bowl xlv', 'super bowl xlv'],
 ['a professional fundraiser', 'city council'],
 ['eight', 'eight'],
 ['mathematical models', ''],
 ['futureplan', 'futureplan'],
 ['fabricating evidence or committing perjury',
  'assisting in fabricating evidence or committing perjury'],
 ['modern cryptographic systems', 'rsa algorithm'],
 ['1', '1'],
 ['provisional registration', ''],
 ['wahhabi / salafi jihadist extremist militant', 'militant'],
 ['the world meteorological organization ( wmo ) and the united nations environment programme ( unep )',
  'world meteorological organization ( wmo ) and the united nations environment programme ( unep )'],
 ['carolina panthers', 'carolina panthers'],
 ['orbit the moon',
  'lm would not be ready in time. rather than waste the saturn v on another simple earth - orbiting mission, aspo manager george low suggested the bold step of sending apollo 8 to orbit the moon instead, deferring the d mission to the next mission in march 1969, and eliminating the e mission'],
 ['gauge bosons',
  'gauge bosons are the fundamental means by which forces are emitted and absorbed'],
 ['alfred drury', 'alfred drury'],
 ['1996', '1996'],
 ['jin dynasty', 'jin'],
 ['during the oligocene, for example, the rainforest spanned a relatively narrow band.',
  'the oligocene'],
 ['ctenophores', 'ctenophores'],
 ['algeria', 'algeria'],
 ['a specially made wooden paddle', 'a specially made wooden paddle'],
 ['non - violent', 'non - violent'],
 ['new england patriots', 'arizona cardinals'],
 ['united methodist church', 'united methodist church'],
 ['clergyman', 'clergyman'],
 ['online pharmacies', 'online pharmacies'],
 ['january 7, 2014', 'january 7, 2014'],
 ['12 to 15 million', '12 to 15 million'],
 ['europeans who were based in britain', 'british and europeans'],
 ['doctor of theology', 'doctor of theology'],
 ['" bricks for warsaw "', '" bricks for warsaw "'],
 ['austrian polytechnic', 'austrian polytechnic'],
 ['metamorphic processes', 'metamorphic processes'],
 ["the world's economy", "the world's economy"],
 ['odo', 'odo, the bishop of bayeux and first earl of kent'],
 ['in the student', 'teacher may create a spark of excitement in the student'],
 ['science', 'science'],
 ['knaurs lexikon', 'knaurs lexikon'],
 ['shinzen japanese gardens', 'woodward park'],
 ['time or space', 'time or space'],
 ['in the chloroplasts of c4 plants', 'in the chloroplasts of c4 plants'],
 ['4, 097. 9 people per square mile', '4, 097. 9 people per square mile'],
 ['military roads to the area by braddock and forbes',
  'legal and illegal settlement'],
 ['november 22', 'november 22'],
 ['rheumatoid arthritis',
  "hashimoto's thyroiditis, rheumatoid arthritis, diabetes mellitus type 1, and systemic lupus erythematosus"],
 ['ipcc',
  "ipcc does not carry out its own research, it operates on the basis of scientific papers and independently documented results from other scientific bodies, and its schedule for producing reports requires a deadline for submissions prior to the report's final release. in principle, this means that any significant new evidence or events that change our understanding of climate science between this deadline and publication of an ipcc report cannot be included. in an area of science where our scientific understanding is rapidly changing, this has been raised as a serious shortcoming in a body which is widely regarded as the ultimate authority on the science"],
 ['27 july 2008', '27 july 2008'],
 ['over 10, 000', '10, 000'],
 ['june 4, 2014', 'june 4, 2014'],
 ['domestic legislation of the scottish parliament',
  'the domestic legislation'],
 ['luther', 'luther'],
 ['bacteriophages', 'viral pathogens, called bacteriophages'],
 ['dublin, cork, portarlington, lisburn, waterford and youghal',
  'dublin, cork, portarlington, lisburn, waterford and youghal'],
 ['2018', '2018'],
 ['kidney and bladder stones', 'kidney and bladder stones, and arthritis'],
 ['storage vessels', 'allow combustion to proceed rapidly and energetically'],
 ['algeria', 'vietnam in the 1950s. whereas they won the war in algeria'],
 ['an imaginative geography', 'geography'],
 ['1303', '1303'],
 ['singing of german hymns',
  'a mighty fortress is our god " ), based on psalm 46, and " vom himmel hoch, da komm ich her " ( " from heaven above to earth i come " ), based on luke 2 : 11 – 12. luther connected high art and folk music, also all classes, clergy and laity, men, women and children. his tool of choice for this connection was the singing of german hymns'],
 ['eidetic',
  "nikola credited his eidetic memory and creative abilities to his mother's genetics and influence"],
 ['40, 000 pounds', 'over 40, 000 pounds'],
 ['opportunity - based entrepreneurship', 'opportunity - based'],
 ['100 – 150 species', '100 – 150'],
 ['over 600, 000', '600, 000 drawings'],
 ['nonconservative forces', ''],
 ['using sickles to deflate one of the large domes covering two satellite dishes',
  'padlocking the gates and using sickles to deflate one of the large domes covering two satellite dishes'],
 ['tesla would be killed through overwork',
  'tesla would be killed through overwork'],
 ['world today', 'world today'],
 ['eu law', ''],
 ['" small business big game "', 'small business big game'],
 ['by a fee per unit of information transmitted', ''],
 ['the father of the house', 'the father of the house'],
 ['english and swahili', 'english and swahili'],
 ['ctdna, or cpdna', 'ctdna, or cpdna'],
 ['mustered local militia companies, generally ill trained and available only for short periods, to deal with native threats, but did not have any standing forces.',
  ''],
 ['cam newton', 'cam newton'],
 ['wbnd - ld', 'wbnd - ld'],
 ['38', '38'],
 ['leonardo da vinci', 'leonardo da vinci'],
 ['nasa', 'nasa'],
 ['seven', 'seven'],
 ['the innate immune system versus the adaptive immune system',
  'innate immune system versus the adaptive immune system'],
 ['roman catholic', 'roman catholic'],
 ['boston metropolitan area',
  'boston metropolitan area : its 209 - acre ( 85 ha ) main campus is centered on harvard yard in cambridge'],
 ['ealy', 'ealy'],
 ['climate will be a central issue in the renewed medium term plan that will be launched in the coming months',
  'this will create a direct and robust delivery framework for the action plan and ensure climate change is treated as an economy - wide issue'],
 ['maid alice monaghan', 'maid alice monaghan'],
 ['standards of practice', 'standards of practice'],
 ['trade unions', 'trade unions'],
 ['the owner', 'the owner'],
 ['linebacker', 'linebacker'],
 ['brandon mcmanus', 'brandon mcmanus'],
 ['the catholic church in the region', 'catholic church'],
 ['king sigismund iii vasa', 'king sigismund iii vasa'],
 ['mostly christian', 'christian'],
 ['models',
  'models, because of their simplicity, but have also been used in full size working engines, mainly on ships'],
 ['writers guild of america', 'writers guild of america'],
 ['physicians and other healthcare professionals',
  'physicians and other healthcare professionals'],
 ['the innate immune system', 'innate immune system'],
 ['1884',
  '1883 – 84 germany began to build a colonial empire in africa and the south pacific, before losing interest in imperialism. historians have debated exactly why germany made this sudden and short - lived move. [ verification needed ] bismarck was aware that public opinion had started to demand colonies for reasons of german prestige. he was influenced by hamburg merchants and traders, his neighbors at friedrichsruh. the establishment of the german colonial empire proceeded smoothly, starting with german new guinea in 1884'],
 ['1934', '11 july 1934'],
 ['arabic',
  'roman numerals, a practice established at super bowl v, would be temporarily suspended, and that the game would be named using arabic numerals as super bowl 50'],
 ['three - fourths', '10 to 15 million'],
 ['essentially holy people',
  'christians are no longer sinners in themselves and that the church consists only of essentially holy people'],
 ['less willing to travel or relocate',
  'women not taking jobs due to marriage or pregnancy'],
 ['1893', '1893'],
 ['many words', 'words'],
 ['water', ''],
 ['go home and change',
  'directed a subordinate to go home and change her dress'],
 ['ku band', 'lnb'],
 ['energy content',
  'energy content, o2 is used by complex forms of life, such as animals, in cellular respiration'],
 ['a majority', 'qualified majority'],
 ['1994', 'in 1994'],
 ['michael eisner', 'michael eisner'],
 ['warsaw', 'warsaw summer jazz days'],
 ['erganzungsschulen', 'erganzungsschulen'],
 ['the earlier they surrendered to the mongols, the higher they were placed',
  'southern china'],
 ['only the single number 1', ''],
 ['60 %', '60 %'],
 ['samuel webber', 'samuel webber'],
 ['increased flooding and sedimentation', 'flooding'],
 ['complexity class p', 'complexity'],
 ['various academic disciplines',
  'development of various academic disciplines, including : the chicago school of economics, the chicago school of sociology, the law and economics movement in legal analysis'],
 ['four', 'four'],
 ['entirely separate companies', 'entirely separate companies'],
 ['austria', 'austria near the alps'],
 ['tour of thuringia', 'thuringia'],
 ['british', 'british'],
 ['350', '350'],
 ['the wetter climate may have allowed the tropical rainforest to spread out across the continent.',
  'wetter'],
 ['22 miles', '22'],
 ['bart starr', 'bart starr'],
 ['chartered', 'chartered'],
 ['apollo 1 backup crew', 'apollo 1'],
 ['epidemiological account of the plague', 'an epidemiological account'],
 ['pseudorandom number generators', 'pseudorandom'],
 ['gravity', 'force of gravity'],
 ['fred singer', 'fred singer'],
 ['91. 3', '94. 1 males'],
 ['spain', 'spain'],
 ['william and judith bollinger', 'william and judith bollinger'],
 ['a user or host could call a host on a foreign network by including the dnic of the remote network as part of the destination address',
  'the interconnection of national x. 25 networks'],
 ['joseph shea', 'joseph shea'],
 ['dudley simpson', 'dudley simpson'],
 ['baptism', 'baptism'],
 ['galileo', 'aristotle'],
 ['central truths of christianity', 'the central truths of christianity'],
 ['elie metchnikoff', 'elie metchnikoff'],
 ['39', '39'],
 ['diatomic oxygen', 'diatomic oxygen'],
 ['skylab', 'skylab'],
 ["temujin's mother hoelun", "temujin's mother hoelun"],
 ['all women', 'all women'],
 ['chris keates', 'chris keates'],
 ['royal ujazdow castle', 'royal ujazdow castle'],
 ['einstein', 'albert einstein'],
 ['warner bros.',
  'warner bros. - based studio that briefly programmed the entire friday lineup during the 1990 – 91 season ( with going places joining family matters, full house and perfect strangers on the " tgif " schedule ) and through its development deal with paramount television'],
 ['achievement - oriented', 'achievement - oriented motivations'],
 ['defensive ends', 'defensive ends'],
 ['exploitation',
  'political activity caused exploitation of the east india company causing the plundering of the local economy'],
 ['backwards', 'ward'],
 ['chao', 'chao'],
 ['general relativity',
  'unclear as to how or whether this connection is relevant on microscales'],
 ['moselle', 'the moselle'],
 ['principal role',
  'the principal role of committees in the scottish parliament is to take evidence from witnesses, conduct inquiries and scrutinise legislation'],
 ['william the conqueror', 'william the conqueror'],
 ['southwest', 'west and south'],
 ['2. 7 %', '2. 7 %'],
 ['peace of westphalia', 'the peace of westphalia'],
 ['great dividing range', 'great dividing range'],
 ['39', '39'],
 ['increasing access to education', 'increasing access to education'],
 ['nationalisation law was from 1962, and the treaty was in force from 1958',
  'refused to pay his electricity bill to enel, as a protest against the nationalisation of the italian energy corporations. he claimed the italian nationalisation law conflicted with the treaty of rome, and requested a reference be made to both the italian constitutional court and the court of justice under tfeu article 267. the italian constitutional court gave an opinion that because the nationalisation law was from 1962, and the treaty was in force from 1958, costa had no claim. by contrast, the court of justice held that ultimately the treaty of rome in no way prevented energy nationalisation, and in any case under the treaty provisions only the commission could have brought a claim, not mr costa. however, in principle, mr costa was entitled to plead that the treaty conflicted with national law, and the court would have a duty to consider his claim to make a reference if there would be no appeal against its decision. the court of justice, repeating its view in van gend en loos, said member states " albeit within limited spheres, have restricted their sovereign rights and created a body of law applicable both to their nationals and to themselves " on the " basis of reciprocity ". eu law would not " be overridden by domestic legal provisions, however framed... without the legal basis of the community itself being called into question. " this meant any " subsequent unilateral act " of the member state inapplicable'],
 ['introductory', 'introductory stage of the bill'],
 ['small', '10, 000'],
 ['long - lived memory cells', 'long - lived memory cells'],
 ['7th century', '7th century'],
 ['maurus servius honoratus', ''],
 ['acquittal and avoid imprisonment',
  'win an acquittal and avoid imprisonment or a fine'],
 ['6th century bc', '6th century bc'],
 ['his protection',
  'promised civilians and soldiers wealth from future possible war spoils'],
 ['in the virtual call system, the network guarantees sequenced delivery of data to the host',
  'user datagram protocol'],
 ["wilson's theorem", "wilson's theorem"],
 ['the 1950s', 'from the 1950s'],
 ['ottoman', 'ottoman'],
 ['more capital', ''],
 ['five inches', 'five inches'],
 ['those who proceed to secondary school or vocational training',
  'secondary school or vocational training'],
 ['each packet includes complete addressing information',
  'dispatched and may go via different routes'],
 ['john the steadfast', 'john the steadfast'],
 ['a diatom ( heterokontophyte ) derived chloroplast',
  'diatom ( heterokontophyte ) derived chloroplast'],
 ['sea level', 'sea level'],
 ['presiding officer', 'presiding officer'],
 ['muqali', 'muqali'],
 ['1421', '1421'],
 ['regulations and directives', 'treaties establishing the european union'],
 ['the translation', 'translation'],
 ['5, 000', '5, 000'],
 ['drinking water', 'drinking water'],
 ['number of gates in a circuit', 'the number of gates in a circuit'],
 ['the packets may be delivered according to a multiple access scheme',
  'according to a multiple access scheme'],
 ['sainte foy in quebec', 'sainte foy in quebec'],
 ['li meng', 'li meng'],
 ['weightlessness', 'weightlessness'],
 ['tethys sea', 'tethys sea'],
 ['50, 000', '50, 000'],
 ['digital terrestrial', 'digital terrestrial'],
 ['centripetal', 'radial'],
 ['complexity classes', 'complexity classes'],
 ['60 %', '60 %'],
 ['nitrogen', 'the nitrogen'],
 ['connectional table', 'the connectional table'],
 ['town council', 'the town council'],
 ['tuition', 'by charging their students tuition fees'],
 ['antibodies', 'antibodies'],
 ['fell significantly', 'fell significantly'],
 ['alter rhein', 'the alter rhein ( " old rhine " )'],
 ['poet', 'poet'],
 ['cytokine tgf - β', 'cytokine tgf - β'],
 ['26', '13 years and 48 days ( manning was 39, newton was 26 )'],
 ['netherlands',
  'graubunden in the southeastern swiss alps, forms part of the swiss - austrian, swiss - liechtenstein border, swiss - german and then the franco - german border, then flows through the rhineland and eventually empties into the north sea in the netherlands'],
 ['zagreus', 'an audio drama titled zagreus'],
 ['murder of christ', 'murder of christ'],
 ['innate immune systems', 'innate'],
 ['particular skills', 'particular skills'],
 ['wealth concentration', 'wealth concentration'],
 ['criminalized behavior', 'forbidden speech'],
 ['super bowl opening night.', 'super bowl opening night'],
 ['340 miles', 'about 340 miles ( 550 km ) north'],
 ['share recordings', ''],
 ['mantle', "the earth's mantle"],
 ['antiforms', 'synforms'],
 ['lake uberlingen', 'lake uberlingen'],
 ['renewal of hostilities in the arab – israeli conflict', ''],
 ['3 million followers', '3 million'],
 ['scandinavia', 'scandinavia and northern europe'],
 ['photolysis of ozone', 'photolysis'],
 ['smart ticketing', 'smart ticketing'],
 ['unexplored territory', 'unknown or unexplored territory'],
 ['2004', '2004'],
 ['pad 37', 'pad 37'],
 ['world methodist council', 'world methodist council'],
 ['estates of the holy roman empire',
  'a general assembly of the estates of the holy roman empire'],
 ['lake rhine', 'the seerhein ( " lake rhine " )'],
 ['emmanuel sanders', 'emmanuel sanders'],
 ['ediacaran period', 'preceding ediacaran'],
 ['british superintendent for indian affairs in the new york region and beyond',
  'superintendent for indian affairs'],
 ['james lafayette', 'james lafayette'],
 ['phagocytes', 'phagocytes'],
 ['encourage',
  'to pressure the lazy, inspire the bored, deflate the cocky, encourage'],
 ['upper classes', 'upper classes'],
 ['sankt goarshausen', 'lorelei'],
 ['32', '32'],
 ['charter', 'charter status'],
 ['1913', '1913'],
 ['nonconservative forces', 'nonconservative forces'],
 ['algebraic aspects', 'algebraic'],
 ['everyday germans', 'germans'],
 ['melatonin', 'melatonin'],
 ['in a modern context', 'modern context'],
 ['early 1970s', '1970s'],
 ['sugar and oxygen ( o2 )', 'food'],
 ['german vernacular', 'german vernacular'],
 ['issue of laity having a voice and vote in the administration of the church',
  'over the issue of laity having a voice and vote in the administration of the church'],
 ['sediment deposits',
  'sediment deposits from amazon basin paleolakes and from the amazon fan'],
 ['38', '38'],
 ['in 1640', '1640'],
 ['edsen khoroo', 'edsen khoroo'],
 ['danube', 'rhine and upper danube'],
 ['inalchuq', 'inalchuq'],
 ['holy catholic ( or universal ) church', 'holy catholic'],
 ['the lamprey and hagfish', 'lamprey and hagfish'],
 ['wardenclyffe tower project', 'wardenclyffe tower'],
 ['non - combustible', 'coal'],
 ['four', '5½ sacks, four'],
 ['entire length',
  'northern ( german ) shore of the lake, off the island of lindau'],
 ['queer as folk', 'queer as folk'],
 ['poorly drafted contracts', 'poorly drafted contracts'],
 ['toward the center of the curving path',
  'toward the center of the curving path'],
 ['christian education', 'standard of pastoral care and christian education'],
 ['dublin', 'dublin'],
 ['divergence', 'divergence'],
 ['eurocities', 'eurocities'],
 ['rates of sea - level rise', 'ongoing tectonic subsidence'],
 ['ming and qing', 'ming and qing dynasties'],
 ['quietly', 'sit quietly'],
 ['article 17 ( 3 )', 'article 17'],
 ['charleston, south carolina', 'charleston, south carolina'],
 ['calipso', 'calipso'],
 ['microorganisms', 'microorganisms'],
 ['pancake - shaped circular disks', ''],
 ['1523', '1523'],
 ['apollo 17', 'apollo 17'],
 ['democracy', 'democracy'],
 ['modern soils', 'soils, rivers, landscapes, and glaciers'],
 ['southern china withstood and fought to the last',
  'southern china withstood and fought to the last before caving in'],
 ['loss of biodiversity', 'biodiversity'],
 ['the national anthem', 'the national anthem'],
 ['city council', 'the city council'],
 ['tehachapis', 'tehachapis'],
 ['during the itv network strike of 1979', '2005'],
 ['placebo', 'placebo effect'],
 ['2015', '2016'],
 ['roger goodell', 'roger goodell'],
 ['the establishment of a new and independent ethics and anti - corruption commission',
  'curbing corruption from the kenyan government, for instance, the establishment of a new and independent ethics and anti - corruption commission ( eacc )'],
 ['massachusetts', 'massachusetts'],
 ['normal force', ''],
 ['patient care rounds drug product selection',
  'patient care rounds drug product selection'],
 ['garrisons', 'garrisons at oswego, fort bull, and fort williams'],
 ['committees', 'committees'],
 ['frontex', 'frontex'],
 ["teacher's colleges", 'teaching unions'],
 ['chinggis khaan', 'chinggis khaan'],
 ['a period of foreign domination', ''],
 ['steam engine indicator', 'the steam engine indicator'],
 ['cholera', 'cholera'],
 ['manhattan', '89 liberty street'],
 ['1565', '1564'],
 ['elliptical', 'elliptical'],
 ['stanford university', 'stanford university'],
 ['one', '18 of 41 passes for 265 yards'],
 ['meritocratic', 'meritocratic'],
 ['the keraites', 'the jadaran'],
 ['one year at a time', 'annually'],
 ['a village',
  'a village located at the modern - day site of mariensztat neighbourhood'],
 ['swimming - plates', 'swimming - plates'],
 ['installed electrical arc light based illumination systems designed by tesla',
  'the company installed electrical arc light based illumination systems'],
 ['rapidly raising population and traffic in cities along sr 99',
  'the desirability of federal funding'],
 ['vietnam', 'vietnam'],
 ['1943', '1943'],
 ['grey street', 'grey street'],
 ['depths of the oceans and seas', 'the depths of the oceans and seas'],
 ['$ 105 billion', '$ 105 billion'],
 ['religious coalition for reproductive choice',
  'religious coalition for reproductive choice'],
 ['essentials', 'essentials'],
 ['eight years in primary school and four years in high school or secondary school.',
  ''],
 ['university of paris', 'geneva'],
 ['westminster',
  'scottish parliament is unable to legislate on such issues that are reserved to, and dealt with at, westminster'],
 ['february 9, 1832', 'february 9, 1832'],
 ['einstein', 'einstein'],
 ['41 years old', '41 years old'],
 ['to clean them', 'to clean them of plants and sediments'],
 ['west of the appalachian mountains',
  'lands west of the appalachian mountains'],
 ['paleoclimatologists', 'paleoclimatologists'],
 ['nearly three hundred years', 'three hundred years'],
 ['1, 100', '16, 000'],
 ['67. 9', '67. 9'],
 ['alberto calderon', 'alberto calderon'],
 ['three weight rooms', 'three weight rooms'],
 ['ossachite', 'ossachite'],
 ['christian whiton', 'christian whiton'],
 ['nuda', 'nuda'],
 ['all health care settings', 'all health care settings'],
 ['tony hawk', 'tony hawk'],
 ["unesco's world heritage list", "unesco's world heritage list"],
 ['value added by different classifications of workers', ''],
 ['kill luther', 'kill'],
 ['breast milk or colostrum',
  'breast milk or colostrum also contains antibodies that are transferred to the gut of the infant and protect against bacterial infections until the newborn can synthesize its own antibodies. this is passive immunity because the fetus does not actually make any memory cells or antibodies — it only borrows them. this passive immunity is usually short - term, lasting from a few days up to several months. in medicine, protective passive immunity can also be transferred artificially from one individual to another via antibody - rich serum'],
 ['gandhi', 'henry david thoreau'],
 ['much larger conflict between france and great britain',
  'a much larger conflict between france and great britain'],
 ['justice and prosperity', 'justice and prosperity'],
 ['archbishop albrecht', 'archbishop albrecht of mainz and magdeburg'],
 ['781', '781'],
 ['pittsburgh steelers', 'pittsburgh steelers'],
 ['up to 2 % higher', '2 %'],
 ['steam', 'powered'],
 ['graduate and undergraduate students',
  'graduate and undergraduate students'],
 ['female', 'female'],
 ['driving them in front of the army', 'driving them in front of the army'],
 ['five', 'five'],
 ['san diego', ''],
 ['switzerland', 'along the lower rhine'],
 ['the british empire', 'british'],
 ['1082', '1085'],
 ['7, 000, 000 square kilometres ( 2, 700', '7, 000, 000'],
 ['1998', '1998'],
 ['plague of athens in 430 bc', 'the plague of athens in 430 bc'],
 ['greenhouse gas concentrations in the atmosphere',
  'greenhouse gas concentrations in the atmosphere'],
 ['cryptomonads', 'cryptomonads'],
 ['uberseering bv v nordic construction gmbh',
  'uberseering bv v nordic construction gmbh the court of justice held that a german court could not deny a dutch building company the right to enforce a contract in germany on the basis that it was not validly incorporated in germany'],
 ['dyrrachium',
  'dyrrachium — one of the most important naval bases of the adriatic'],
 ['protestant', 'protestant'],
 ['lothar de maiziere', 'lothar de maiziere'],
 ['one', 'one'],
 ['unions',
  'u. s. economy consistently affords a lower level of economic mobility'],
 ['toyota corona mark ii', ''],
 ['marconi', 'guglielmo marconi'],
 ['by experience', 'experience'],
 ['pons aelius', 'pons aelius'],
 ['walt disney', 'walt disney'],
 ['bskyb', 'bskyb'],
 ['two', 'two piston strokes'],
 ['post - classical european', 'post - classical european'],
 ['antigen presentation', 'antigen presentation'],
 ['cobham - edmonds thesis', 'cobham - edmonds'],
 ['the daleks ( a. k. a. the mutants )', 'aliens'],
 ['up to a thousand times', 'diffuse back down their concentration gradient'],
 ['new york city', 'new york city'],
 ['environmental degradation', 'this'],
 ['format of the congress and many specifics of the plan became the prototype for confederation during the war of independence',
  'to formalize a unified front in trade and negotiations with various indians'],
 ['reduction', 'reduction'],
 ['southern', 'southern california region'],
 ['war', 'war'],
 ['chief electrician', 'chief electrician'],
 ['soft power', 'soft power'],
 ['free', 'free state'],
 ['american football conference', 'american football conference'],
 ['1220', '1220'],
 ['l',
  'l ( the set of all problems that can be solved in logarithmic space ) is strictly contained in p or equal to p'],
 ['1895', '1895'],
 ['construction', 'construction'],
 ['grumman', 'grumman'],
 ['agitation for constitutional reform', 'constitutional reform'],
 ['oceanic', 'oceanic'],
 ['rote learning',
  '" a day of rote learning and often wearying spiritual exercises. "'],
 ['relaunch the show',
  'the bbc hoped to find an independent production company to relaunch the show'],
 ['neil shubin and paul sereno', 'neil shubin and paul sereno'],
 ['it has trouble crossing membranes to get to where it is needed',
  'trouble crossing membranes to get to where it is needed'],
 ['5th century', ''],
 ['all', 'all uses of sculpture'],
 ['jamukha', 'jamukha'],
 ['motivated students', 'motivated students'],
 ['capital punishment', 'capital punishment'],
 ['lung tissue', ''],
 ['40 %', ''],
 ['gospic, austrian empire', 'gospic, austrian empire'],
 ['unstable six - carbon molecules that immediately break down',
  'unstable six - carbon molecules'],
 ['a plug - n - play system', 'plug - n - play system'],
 ['divergent boundaries', 'mid - ocean ridges'],
 ['34 million years', '34 million years'],
 ['conspiracy', 'conspiracy against islam'],
 ['21', '21 to 11'],
 ['south coast metro', 'the south coast metro'],
 ['april', 'april and october'],
 ['capture niagara, crown point and duquesne, he proposed attacks on fort frontenac on the north shore of lake ontario',
  'attacks on fort frontenac on the north shore of lake ontario and an expedition through the wilderness of the maine district and down the chaudiere river to attack the city of quebec.'],
 ['zero', 'zero'],
 ['daniel 8 : 9 – 12, 23 – 25',
  'daniel 8 : 9 – 12, 23 – 25. the antichrist of 2 thessalonians 2 was identified as the power of the papacy. so too was the little horn of daniel 7, coming up among the divisions of rome'],
 ['biological structures and processes within an organism',
  "a system of many biological structures and processes within an organism that protects against disease. to function properly, an immune system must detect a wide variety of agents, known as pathogens, from viruses to parasitic worms, and distinguish them from the organism's own healthy tissue. in many species, the immune system can be classified into subsystems, such as the innate immune system versus the adaptive immune system"],
 ['circuit switching', 'circuit switching'],
 ['its safaris, diverse climate and geography, and expansive wildlife reserves and national parks',
  'its safaris'],
 ['malik jackson', 'malik jackson'],
 ['a doctor', 'a doctor'],
 ['aided', "' aided'schools"],
 ['80, 000', '80, 000 primarily french - speaking roman catholic residents'],
 ['all other animals', 'bilateria'],
 ['prestigious',
  'prestigious ones are national museum with a collection of works whose origin ranges in time from antiquity till the present epoch as well as one of the best collections of paintings in the country'],
 ['dr. george e. mueller', 'dr. george e. mueller'],
 ['social spending', 'social spending'],
 ['steam turbines',
  'steam turbines with reduction gearing ( although the turbinia has direct turbines to propellers with no reduction gearbox ) dominated large ship propulsion throughout the late 20th century, being more efficient ( and requiring far less maintenance ) than reciprocating steam engines. in recent decades, reciprocating diesel engines'],
 ['as soon as they enter into force, unless stated otherwise',
  'soon as they enter into force'],
 ['uncertain', 'uncertain'],
 ['non - self molecules', 'non - self molecules'],
 ['2010', '2010'],
 ['apicomplexan - related diseases', 'apicomplexan - related diseases'],
 ['the computational model', 'computational model used'],
 ['after the sixth sermon', 'after the sixth'],
 ['much land and housing', 'land and housing'],
 ['drowned', 'drowned'],
 ['a presidential representative democratic republic',
  'a presidential representative democratic republic'],
 ['la galaxy', 'la galaxy'],
 ['maria de la queillerie', 'maria de la queillerie'],
 ['the best engineering school',
  'promised to send him to the best engineering school'],
 ['45',
  "45 total touchdowns ( 35 passing, 10 rushing ), a career - low 10 interceptions, and a career - best quarterback rating of 99. 4. newton's leading receivers were tight end greg olsen, who caught a career - high 77 passes for 1, 104 yards and seven touchdowns"],
 ['los angeles', 'los angeles and san diego'],
 ['1760', '1760'],
 ["ncaa's division iii", 'division iii'],
 ['conservative', 'conservative saudi - based wahhabism'],
 ['1962', '1962'],
 ['12', '12'],
 ['75th birthday', "on tesla's 75th birthday"],
 ['charlesfort', 'charlesfort'],
 ['steymann v staatssecretaris van justitie',
  'steymann v staatssecretaris van justitie'],
 ['up to 30 %', '30 %'],
 ['a way of continuing their protest',
  'as a way of reminding their countrymen of injustice'],
 ['sky', 'setanta sports'],
 ['three', 'three'],
 ['nineteenth - century maps', 'scramble for africa "'],
 ['from the 16th and 17th centuries', 'the 16th and 17th centuries'],
 ['5, 984', '5, 984'],
 ['1858', '1858'],
 ['prelaunch test', 'manned lunar landing'],
 ['relationship contracting where the emphasis is on a co - operative relationship',
  'relationship contracting'],
 ['january 1979', 'january 1979'],
 ['the chloroplast peripheral reticulum',
  'the chloroplast peripheral reticulum'],
 ['deficiencies',
  'deficiencies existed in command module design, workmanship and quality control'],
 ['numeracy', 'numeracy'],
 ['the second republic', 'second republic'],
 ['the south pacific', 'south pacific'],
 ['on the ground', 'on the ground'],
 ['suburban', 'suburban shopping areas'],
 ['mycobacterium tuberculosis', 'mycobacterium tuberculosis'],
 ['$ 2 million', '$ 2 million'],
 ['factory project', 'the factory project'],
 ['1973', '1973'],
 ['ijsselmeer', 'to the north'],
 ['substitute capital equipment', 'this trend'],
 ['2011.', '2011'],
 ['kuviasungnerk / kangeiko', 'kuviasungnerk / kangeiko'],
 ['leftist / communist / nationalist insurgents / opposition',
  'bulwarks against — what were thought to be at the time — more dangerous leftist / communist / nationalist insurgents / opposition'],
 ['generate atp energy',
  'use the potential energy stored in an h +, or hydrogen ion gradient to generate atp energy'],
 ['fort beausejour', 'fort beausejour'],
 ['lymphokines', 'lymphokines'],
 ['substitute parent', 'substitute parent'],
 ['from 1910 – 1940', 'from 1910 – 1940'],
 ['circa 1964 – 1965', '1964 – 1965'],
 ['new orangery', 'the new orangery'],
 ['b cell', 'b'],
 ['dai on ulus, also rendered as ikh yuan uls or yekhe yuan ulus', ''],
 ['florida', 'florida'],
 ['malnutrition', 'malnutrition'],
 ['2000', '2000'],
 ['small catechism', ''],
 ['san diego chargers', ''],
 ['john bassett', 'john bassett'],
 ['new england patriots',
  'the broncos finished the regular season with a 12 – 4 record, and denied the new england patriots a chance to defend their title from super bowl xlix by defeating them 20 – 18'],
 ['the keraite', 'keraite'],
 ['divide to form new pyrenoids, or be produced " de novo "', 'de novo'],
 ['central europe', 'central europe'],
 ['tesla polyphase system', 'tesla polyphase system'],
 ['division i', 'pac - 12'],
 ['victoria constitution act 1855', 'victoria constitution act 1855'],
 ['carbon monoxide', 'carbon monoxide'],
 ['nbc blue network', 'nbc blue'],
 ['1474', '1474'],
 ['18 february 1546', '2 : 45 a. m. on 18 february 1546'],
 ['american institute of electrical engineers',
  'the institute of radio engineers'],
 ['19', '19 of 28'],
 ['the entertainment channel', 'the entertainment channel'],
 ['four', 'four'],
 ['2004', '2004'],
 ["to increase the chloroplast's surface area for cross - membrane transport",
  "to increase the chloroplast's surface area for cross - membrane transport, because they are often branched and tangled with the endoplasmic reticulum"],
 ['23. 9 %', '23. 9 %'],
 ['decision problems',
  'function problems can be recast as decision problems. for example, the multiplication of two integers can be expressed as the set of triples ( a, b, c ) such that the relation a × b = c holds. deciding whether a given triple is a member of this set corresponds to solving the problem of multiplying two numbers.'],
 ['joining the easterly flow toward the atlantic.', 'the atlantic'],
 ['around 50 years of age', '50'],
 ['zaltbommel', ''],
 ['1724 to 1725',
  'johann sebastian bach included several verses as chorales in his cantatas and based chorale cantatas entirely on them, namely christ lag in todes banden, bwv 4, as early as possibly 1707, in his second annual cycle ( 1724 to 1725'],
 ['1943', '1943'],
 ["africa's most successful nation in the 2008 olympics",
  'won several medals'],
 ['mpeg - 4', 'mpeg - 4'],
 ['boat', 'boat'],
 ['west', 'roughly west'],
 ['archbishop of trier', 'the archbishop of trier'],
 ['the royal grammar school', 'royal grammar school'],
 ['wijk bij duurstede', 'wijk bij duurstede'],
 ['the adaptive immune system', ''],
 ['1970s', 'after the 1970s'],
 ['egyptian islamic jihad organization',
  'egyptian islamic jihad organization responsible for the assassination of anwar sadat in 1981.'],
 ['5 live sports extra', 'bbc radio 5 live and 5 live sports extra'],
 ['swiss canton', 'swiss canton of graubunden'],
 ['journalism', 'journalism'],
 ['isopentenyl pyrophosphate synthesis',
  'isopentenyl pyrophosphate synthesis'],
 ['francisco de orellana', 'francisco de orellana'],
 ['yuri gagarin', 'yuri gagarin'],
 ['religious', 'religious'],
 ['orange - red zeaxanthin', 'zeaxanthin'],
 ['storybook houses', 'storybook houses'],
 ['kip', 'the newton : the kilogram - force ( kgf )'],
 ['four', '13 – 7'],
 ['all german territory', ''],
 ['2006', '2006'],
 ['westwood one', 'westwood one'],
 ['22 june 1857', '22 june 1857'],
 ['spanish', 'spanish'],
 ['germany', 'uk, germany, italy, switzerland and norway'],
 ['it has trouble distinguishing between carbon dioxide and oxygen',
  'it has trouble distinguishing between carbon dioxide and oxygen'],
 ['10, 000',
  '30 schools and a web - based environment, has 700 employees and teaches nearly 10, 000'],
 ['common rules for coal and steel, and then atomic energy',
  'common rules for coal and steel, and then atomic energy'],
 ['1273', '1273'],
 ['1, 388', '1, 388'],
 ['the husband and father', 'husband and father'],
 ['connected via dial - up connections or dedicated async connections',
  'via dial - up connections or dedicated async connections'],
 ['1876', '1876'],
 ['lt col paul von lettow - vorbeck', 'paul von lettow - vorbeck'],
 ['asymptotic distribution', 'asymptotic distribution'],
 ['squillace', 'squillace'],
 ['financial strain',
  "the acquisition of a feasible ac motor gave westinghouse a key patent in building a completely integrated ac system, but the financial strain of buying up patents and hiring the engineers needed to build it meant development of tesla's motor had to be put on hold for a while."],
 ['their low ratio of organic matter to salt and water',
  'low ratio of organic matter to salt and water'],
 ['late 14th - century', 'late 14th - century'],
 ['the tax rate', 'tax rate'],
 ['add o2 instead of co2 to rubp', 'add o2 instead of co2'],
 ['1275', '1275'],
 ['rare and desired', 'rare and desired skills'],
 ['20th', '20th'],
 ['w. e. b. du bois', 'w. e. b. du bois'],
 ['to complete the construction of wardenclyffe.',
  'tesla wrote over 50 letters to morgan, pleading for and demanding additional funding to complete the construction of wardenclyffe.'],
 ['1965 – 66 season', '1965 – 66 season'],
 ['26. 7 %', ''],
 ['soviet union', 'soviet'],
 ['reactive oxygen species', 'reactive oxygen species'],
 ['class ii mhc molecules', 'class ii mhc'],
 ['$ 759, 900', ''],
 ['optimizes the use of medication and promotes health, wellness, and disease prevention',
  ''],
 ['music from the 2008 – 2010 specials',
  'music from the 2008 – 2010 specials'],
 ['william farel', 'william farel'],
 ['in some c3 angiosperms, and even some gymnosperms', 'in the chloroplasts'],
 ['dutch east india company', 'dutch east india company'],
 ['san jose marriott.', 'san jose marriott'],
 ['steeper tax', 'steeper tax progressivity applied to social spending'],
 ['100 – 150', '100 – 150'],
 ['1226', '1226'],
 ['prague', 'prague'],
 ['374', '374'],
 ['evidence',
  'economist joseph stiglitz presented evidence in 2009 that both global inequality and inequality within countries prevent growth by limiting aggregate demand'],
 ['a. a. michelson', 'a. a. michelson'],
 ['a carboxysome', 'carboxysome'],
 ['alpha phi omega', 'alpha phi omega'],
 ['northern europe and the mid - atlantic',
  'northern europe and the mid - atlantic'],
 ['red', 'red algae red'],
 ['albert einstein', 'albert einstein'],
 ['1936', '1936'],
 ['thomas reid and dugald stewart', 'thomas reid and dugald stewart'],
 ['present - day upstate new york and the ohio country',
  'present - day upstate new york and the ohio country'],
 ['3', '4 han tumens and 3'],
 ['everything',
  "everything that is used to work sorrow over sin is called the law, even if it is christ's life, christ's death for sin, or god's goodness experienced in creation. simply refusing to preach the ten commandments among christians – thereby, as it were, removing the three letters l - a - w from the church – does not eliminate the accusing law. claiming that the law – in any form – should not be preached to christians anymore would be tantamount to asserting that christians are no longer sinners in themselves and that the church consists only of essentially holy people"],
 ['teacherspayteachers. com', ''],
 ['strong',
  'geordie dialect has much of its origins in the language spoken by the anglo - saxon populations who migrated to and conquered much of england after the end of roman imperial rule. this language'],
 ['because the operations of the armed forces have been traditionally cloaked by the ubiquitous blanket of “ state security ”',
  'because the operations of the armed forces have been traditionally cloaked by the ubiquitous blanket of “ state security ”'],
 ['isaac newton', 'isaac newton'],
 ['24 august – 3 october 1572', '24 august – 3 october 1572'],
 ['alex seropian', 'katherine dunham'],
 ['kevin harlan', 'kevin harlan'],
 ['glaucophyte chloroplasts', 'glaucophyte'],
 ['by department', 'by department'],
 ['february 2015', 'february 2015'],
 ['february 7, 2016', 'february 7, 2016'],
 ['innate immune system', 'innate immune system'],
 ['villes de surete', '" villes de surete "'],
 ['marshall space flight center', 'manned spacecraft center'],
 ['walt disney presents', 'walt disney presents'],
 ['musical', 'musical venues'],
 ['michael heckenberger and colleagues', 'michael heckenberger'],
 ['the golden gate bridge', 'the golden gate bridge'],
 ['nine seasons', 'nine'],
 ['at rest', ''],
 ['liupanshan', 'liupanshan'],
 ['fashion, architecture, product design, graphic arts and photography',
  "the v & a dundee will be on the city's waterfront and is intended to focus on fashion, architecture, product design, graphic arts and photography"],
 ['1910 to 1940', 'high school movement from 1910 to 1940'],
 ['20th century', 'second half of the 20th century'],
 ['kevin harlan', 'kevin harlan'],
 ['sap center in san jose.', 'sap center in san jose'],
 ['2012', '2012'],
 ['central business districts', 'central business districts'],
 ['osmotic pressure', 'uncertain'],
 ['a bright red - orange carotenoid', 'a bright red - orange carotenoid'],
 ['ofcom', 'ofcom'],
 ['annual conference order of elders', 'order of deacons'],
 ['the merkits', 'the merkits'],
 ['diseases from europe', 'diseases from europe, such as smallpox'],
 ['very similar', 'its substance was very similar'],
 ['legislative council', 'legislative council'],
 ['three meals in a day', 'three'],
 ['medication management system development, deployment and optimization',
  'information technology departments'],
 ['easier credit', 'easier credit to the lower and middle income earners'],
 ['more than 4 kilometers', '1. 6 kilometres'],
 ['secularism and secular nationalism', 'secularism and secular nationalism'],
 ['justice', 'justice resides'],
 ['leonard goldenson', 'leonard goldenson'],
 ['by a fee per unit of connection time, even when no data is transferred',
  ''],
 ['geneva', 'geneva'],
 ['apollo applications program', 'apollo applications program'],
 ['augustinian order', 'the augustinian order'],
 ["accessory pigments that override the chlorophylls'green colors",
  'accessory pigments'],
 ['not benefitting scotland as much as they should',
  'the revenues from the oil were not benefitting scotland'],
 ['xingu tribe', 'xingu'],
 ['nursing homes', 'nursing homes'],
 ['algae', 'algae'],
 ['the doctrine of transubstantiation', 'the pope'],
 ['whether the organelle carries out the last leg of the pathway or if it happens in the cytosol',
  'the chloroplast is known to make the precursors to methionine but it is unclear'],
 ['1st century bc', '1st century bc'],
 ['analysis of algorithms',
  'theoretical computer science are analysis of algorithms and computability theory'],
 ['germany and switzerland', 'turkey'],
 ['the young and the elderly', 'young and the elderly'],
 ['vast areas', 'vast areas that have been left undeveloped'],
 ['cut throat competition',
  'prevent cut throat competition, not to hinder trade'],
 ['fifty thousand dollars',
  'fifty thousand dollars in it for you — if you can do it. " : 54 – 57 : 64 this has been noted as an odd statement from an edison whose company was stingy with pay and who did not have that sort of cash on hand. after months of work, tesla fulfilled the task and inquired about payment. edison, saying that he was only joking, replied, " tesla, you don\'t understand our american humor. " : 64 instead, edison offered a us $ 10 a week raise over tesla\'s us $ 18 per week salary'],
 ['three', 'three'],
 ['galileo', 'galileo'],
 ['a nonphotosynthetic eukaryote engulfed a chloroplast - containing alga but failed to digest it',
  'when a nonphotosynthetic eukaryote'],
 ['beatrix potter', 'charles dickens'],
 ['2005 – 2010', '2005 – 2010'],
 ['fourth', 'fourth'],
 ['bryan davies', 'bryan davies'],
 ['muhammad ibn zakariya razi', 'muhammad ibn zakariya razi'],
 ['citrus', 'cattle and citrus'],
 ['parliament of the united kingdom', 'parliament of the united kingdom'],
 ['three', 'three'],
 ['heart disease, chronic pain, and asthma',
  'heart disease, chronic pain, and asthma'],
 ['2. 5 billion years ago', 'about 2. 5 billion years ago'],
 ['biochemical oxygen demand', 'biochemical oxygen demand'],
 ['dispatched six regiments to new france under the command of baron dieskau in 1755.',
  'dispatched six regiments to new france'],
 ['other scientific bodies', 'scientific papers'],
 ['roone arledge', 'roone arledge'],
 ['british blockade of the french coastline limited french shipping.',
  'a poor harvest in 1757, a difficult winter, and the allegedly corrupt machinations of francois bigot, the intendant of the territory. his schemes to supply the colony inflated prices and were believed by montcalm to line his pockets and those of his associates. a massive outbreak of smallpox'],
 ['recreational', 'mild euphoric'],
 ['1269', '1269'],
 ['time or space', ''],
 ['modern canalized section', ''],
 ['board certified ambulatory care pharmacist',
  'board certified ambulatory care pharmacist'],
 ['itt', 'itt'],
 ['mork & mindy', 'mork & mindy'],
 ['san jose marriott', 'san jose marriott'],
 ['uni in the usa', ''],
 ['base titanium, a subsidiary of base resources of australia',
  'base titanium, a subsidiary of base resources of australia, shipped its first major consignment of minerals to china'],
 ['also known in english as amazonia or the amazon jungle,',
  'amazonia or the amazon jungle'],
 ['42, 000', '42, 000'],
 ['old rhine bridge at constance', ''],
 ['brough park', 'gosforth park'],
 ['gary kubiak', 'gary kubiak'],
 ['dutch', 'dutch gooien, via west frisian'],
 ['david g. booth', 'david g. booth'],
 ['pierre - auguste renoir', 'pierre - auguste renoir'],
 ['higher education', 'higher education'],
 ['oxygen supplementation', 'oxygen supplementation'],
 ['sun life stadium', 'sun life stadium'],
 ...]