In [1]:
import json
import torch
import torch.nn as nn
import random
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [2]:
supporting_texts = json.load(open('../data/supporting.json'))
refuting_texts = json.load(open('../data/refuting.json'))

In [3]:
print(supporting_texts[10])

Evidence:
He composed the scores to the television series Lost, Alias and Fringe, the video ' game series Medal of Honor and Call of Duty and many films such as The Incredibles, Ratatouille, Up, Mission: Impossible - Ghost Protocol, Dawn of the Planet of the Apes, Jurassic World, Inside Out, Star Trek Beyond, Doctor Strange, Rogue One and Spider-Man: Homecoming.

Claim:
Michael Giacchino composed the score for Doctor Strange.


In [4]:
import random
split = 0.8

_prompt = '\n\nThe evidence supports the claim:\n'
all_list = [item + _prompt + 'Y' for item in supporting_texts]
all_list += [item + _prompt + 'N' for item in refuting_texts]
random.shuffle(all_list)
train_list = all_list[:int(len(all_list) * split)]
dev_list = all_list[int(len(all_list) * split):]

In [5]:
del all_list

In [6]:
#json.dump(train_list, open('../data/train_list.json', 'w'))
#json.dump(dev_list, open('../data/dev_list.json', 'w'))

In [17]:
train_list = json.load(open('../data/train_list.json'))
dev_list = json.load(open('../data/dev_list.json'))

In [18]:
print(train_list[0])

Evidence:
Shot in a mockumentary format, it follows the contestants in a beauty pageant called the Sarah Rose Cosmetics Mount Rose American Teen Princess Pageant, held in the small fictional town of Mount Rose, Minnesota, in which various contestants die in suspicious ways.

Claim:
Drop Dead Gorgeous follows the contestants in the Sarah Rose Cosmetics Mount Rose American Child Princess Pageant.

The evidence supports the claim:
N


In [19]:
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def batchify(data, n):
    len_dict = {}
    for item in data:
        length = item.shape[1]
        try:
            len_dict[length].append(item)
        except:
            len_dict[length] = [item]

    batch_chunks = []
    for k in len_dict.keys():
        vectors = len_dict[k]
        batch_chunks += chunks(vectors, n)

    batches = []
    for chunk in batch_chunks:
        inputs = torch.stack([item[0] for item in chunk])
        batches.append((inputs))

    return batches

In [20]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [None]:
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

In [None]:
_limit = 1024
data = []
total_skipped = 0
for item in train_list:
    tokens = tokenizer.encode(item, return_tensors='pt')
    if tokens.shape[1] > _limit:
        total_skipped += 1
        continue
    data.append(tokens)
print(f'Skipped {total_skipped} out of {len(train_list)}')

In [None]:
train_batches = batchify(data, 2)

In [None]:
def train(train_model, batches, optimizer, criterion):
    total_loss = 0.
    for i, batch in tqdm(enumerate(batches), total=len(batches)):
        model.train()
        inputs = batch
        optimizer.zero_grad()
        loss = train_model(inputs.cuda(), labels=inputs.cuda())[0]
        loss.backward()
        torch.nn.utils.clip_grad_norm_(train_model.parameters(), 0.5)
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(batches)

In [None]:
from torch.optim.lr_scheduler import StepLR

random.shuffle(train_batches)
scheduler = StepLR(optimizer, step_size=2, gamma=0.8)
for epoch in range(10):
    random.shuffle(train_batches)
    loss = train(model, train_batches, optimizer, criterion)z
    #test(model, dev_list[:2000])
    print('Epoch:', epoch, 'Loss:', loss)
    torch.save({'epoch': epoch,
                'model_state_dict': model.state_dict()},
                'save_fever' + str(epoch))
    scheduler.step()

In [96]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
checkpoint = torch.load('save_fever2')
model.load_state_dict(checkpoint['model_state_dict'])
_ = model.eval()

Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
import sys
import traceback

def test(model, data):
    model.eval()
    tp = 0
    fp = 0
    fn = 0

    skipped = 0

    for item in tqdm(data):
        expected = get_answer_from_text(item)
        predicted = ''
        try:
            predicted = generate_answer(model, item)
        except (IndexError, RuntimeError) as e:
            print(str(e))
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print(repr(traceback.extract_tb(exc_traceback)))
            skipped += 1
            continue

        if expected == predicted:
            tp += 1
        if expected == 'N' and predicted == 'Y':
            fp += 1
        if expected == 'Y' and predicted == 'N':
            fn += 1

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)
    print('Precision:', precision)
    print('Recall:', precision)
    print('F1:', precision)
    print('Skipped:', skipped)

In [14]:
def get_text_up_to_question(text):
    _claim_yn = 'The evidence supports the claim:\n'
    return text[:text.find(_claim_yn) + len(_claim_yn)]

In [15]:
def get_answer_from_text(text):
    _claim_yn = 'The evidence supports the claim:\n'
    pos = text.find(_claim_yn) + len(_claim_yn)
    return text[pos]

In [16]:
def generate_answer(model, text):
    prompt = get_text_up_to_question(text)
    tokens = tokenizer.encode(prompt, return_tensors='pt')
    _length = 1
    tokens_length = tokens.shape[1]
    if tokens_length + _length >= 1024:
        raise RuntimeError('Text is longer than 1024')
    output = model.generate(
             tokens.cuda(),
             max_length=tokens_length + _length, 
             pad_token_id=50256
    )
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return get_answer_from_text(output)

In [101]:
get_answer_from_text(dev_list[0])

'Y'

In [102]:
generate_answer(model, dev_list[0])

'Y'

In [103]:
test(model, dev_list)

  1%|          | 102/19048 [00:01<04:20, 72.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1528 > 1024). Running this sequence through the model will result in indexing errors
  1%|          | 118/19048 [00:01<04:30, 70.10it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


  2%|▏         | 407/19048 [00:05<04:24, 70.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1593 > 1024). Running this sequence through the model will result in indexing errors
  2%|▏         | 424/19048 [00:05<04:14, 73.23it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


  4%|▍         | 741/19048 [00:10<04:28, 68.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1194 > 1024). Running this sequence through the model will result in indexing errors
  4%|▍         | 750/19048 [00:10<04:14, 71.76it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 10%|▉         | 1813/19048 [00:25<03:52, 74.01it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1517 > 1024). Running this sequence through the model will result in indexing errors
 10%|▉         | 1830/19048 [00:26<03:51, 74.32it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 14%|█▍        | 2697/19048 [00:38<03:57, 68.89it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1154 > 1024). Running this sequence through the model will result in indexing errors
 14%|█▍        | 2706/19048 [00:39<03:46, 72.11it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 15%|█▌        | 2925/19048 [00:42<03:49, 70.28it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1383 > 1024). Running this sequence through the model will result in indexing errors
 15%|█▌        | 2941/19048 [00:42<03:46, 70.98it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 18%|█▊        | 3372/19048 [00:48<03:54, 66.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (3966 > 1024). Running this sequence through the model will result in indexing errors
 18%|█▊        | 3388/19048 [00:48<03:44, 69.75it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 22%|██▏       | 4160/19048 [00:59<03:21, 74.06it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1400 > 1024). Running this sequence through the model will result in indexing errors
 22%|██▏       | 4177/19048 [01:00<03:18, 74.94it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 25%|██▍       | 4696/19048 [01:07<03:32, 67.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1132 > 1024). Running this sequence through the model will result in indexing errors
 25%|██▍       | 4705/19048 [01:07<03:20, 71.39it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 29%|██▉       | 5478/19048 [01:18<03:12, 70.51it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1035 > 1024). Running this sequence through the model will result in indexing errors
 29%|██▉       | 5486/19048 [01:18<03:09, 71.60it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 33%|███▎      | 6305/19048 [01:30<03:07, 68.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1288 > 1024). Running this sequence through the model will result in indexing errors
 33%|███▎      | 6322/19048 [01:30<02:52, 73.67it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 35%|███▍      | 6623/19048 [01:34<03:06, 66.50it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2306 > 1024). Running this sequence through the model will result in indexing errors
 35%|███▍      | 6640/19048 [01:35<02:57, 69.99it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 36%|███▌      | 6815/19048 [01:37<02:39, 76.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1115 > 1024). Running this sequence through the model will result in indexing errors
 36%|███▌      | 6831/19048 [01:37<02:37, 77.78it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 38%|███▊      | 7238/19048 [01:43<02:37, 74.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1196 > 1024). Running this sequence through the model will result in indexing errors
 38%|███▊      | 7255/19048 [01:43<02:35, 75.61it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 38%|███▊      | 7286/19048 [01:44<03:03, 63.98it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1095 > 1024). Running this sequence through the model will result in indexing errors
 38%|███▊      | 7304/19048 [01:44<02:44, 71.29it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 41%|████      | 7807/19048 [01:51<02:36, 72.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2348 > 1024). Running this sequence through the model will result in indexing errors
 41%|████      | 7824/19048 [01:51<02:28, 75.45it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 44%|████▎     | 8289/19048 [01:58<02:35, 69.27it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1084 > 1024). Running this sequence through the model will result in indexing errors
 44%|████▎     | 8304/19048 [01:58<02:33, 70.08it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 54%|█████▍    | 10291/19048 [02:27<01:56, 75.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1198 > 1024). Running this sequence through the model will result in indexing errors
 54%|█████▍    | 10307/19048 [02:27<02:03, 70.60it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 54%|█████▍    | 10364/19048 [02:28<02:07, 68.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1154 > 1024). Running this sequence through the model will result in indexing errors
 54%|█████▍    | 10373/19048 [02:28<02:00, 71.76it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 55%|█████▌    | 10517/19048 [02:30<02:00, 70.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1727 > 1024). Running this sequence through the model will result in indexing errors
 55%|█████▌    | 10533/19048 [02:30<01:56, 73.27it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 58%|█████▊    | 10986/19048 [02:37<02:14, 59.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1929 > 1024). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 11001/19048 [02:37<02:02, 65.75it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1101 > 1024). Running this sequence through the model will result in indexing errors


Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]
Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 60%|█████▉    | 11405/19048 [02:43<01:52, 67.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1604 > 1024). Running this sequence through the model will result in indexing errors
 60%|█████▉    | 11420/19048 [02:44<01:53, 67.37it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 61%|██████    | 11532/19048 [02:45<01:58, 63.66it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1036 > 1024). Running this sequence through the model will result in indexing errors
 61%|██████    | 11546/19048 [02:46<02:02, 61.16it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 66%|██████▌   | 12572/19048 [03:01<01:42, 63.10it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1142 > 1024). Running this sequence through the model will result in indexing errors
 66%|██████▌   | 12589/19048 [03:01<01:34, 68.18it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 68%|██████▊   | 12998/19048 [03:07<01:31, 66.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1148 > 1024). Running this sequence through the model will result in indexing errors
 68%|██████▊   | 13014/19048 [03:07<01:28, 67.92it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 70%|███████   | 13418/19048 [03:13<01:21, 69.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1107 > 1024). Running this sequence through the model will result in indexing errors
 71%|███████   | 13436/19048 [03:13<01:14, 75.27it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 71%|███████   | 13496/19048 [03:14<01:31, 60.98it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1461 > 1024). Running this sequence through the model will result in indexing errors
 71%|███████   | 13512/19048 [03:15<01:21, 67.84it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 72%|███████▏  | 13656/19048 [03:17<01:16, 70.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1193 > 1024). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 13672/19048 [03:17<01:13, 72.97it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 72%|███████▏  | 13759/19048 [03:18<01:23, 63.08it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1089 > 1024). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 13774/19048 [03:18<01:21, 64.46it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 74%|███████▍  | 14109/19048 [03:23<01:10, 69.90it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1150 > 1024). Running this sequence through the model will result in indexing errors
 74%|███████▍  | 14126/19048 [03:23<01:05, 74.59it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 77%|███████▋  | 14615/19048 [03:30<01:03, 69.51it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1294 > 1024). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 14631/19048 [03:31<01:01, 71.39it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 78%|███████▊  | 14840/19048 [03:34<01:00, 69.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1954 > 1024). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 14856/19048 [03:34<01:02, 67.03it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 81%|████████▏ | 15483/19048 [03:43<00:49, 71.83it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1344 > 1024). Running this sequence through the model will result in indexing errors
 81%|████████▏ | 15499/19048 [03:43<00:48, 73.85it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 82%|████████▏ | 15549/19048 [03:44<00:47, 73.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1398 > 1024). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 15565/19048 [03:44<00:48, 71.95it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 82%|████████▏ | 15672/19048 [03:45<00:44, 76.07it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2769 > 1024). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 15680/19048 [03:46<00:45, 74.27it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 83%|████████▎ | 15843/19048 [03:48<00:47, 67.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1385 > 1024). Running this sequence through the model will result in indexing errors
 83%|████████▎ | 15859/19048 [03:48<00:45, 69.52it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 86%|████████▋ | 16474/19048 [03:57<00:38, 66.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1087 > 1024). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 16490/19048 [03:58<00:37, 68.16it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 87%|████████▋ | 16570/19048 [03:59<00:40, 60.95it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1059 > 1024). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 16584/19048 [03:59<00:40, 60.81it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 88%|████████▊ | 16750/19048 [04:02<00:34, 66.15it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2016 > 1024). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 16764/19048 [04:02<00:36, 62.79it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 90%|████████▉ | 17054/19048 [04:06<00:28, 70.01it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1548 > 1024). Running this sequence through the model will result in indexing errors
 90%|████████▉ | 17062/19048 [04:06<00:28, 69.43it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 96%|█████████▌| 18224/19048 [04:23<00:12, 65.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1237 > 1024). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 18241/19048 [04:23<00:11, 71.65it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 96%|█████████▌| 18302/19048 [04:24<00:11, 67.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1306 > 1024). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 18318/19048 [04:24<00:10, 71.04it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 97%|█████████▋| 18436/19048 [04:26<00:08, 72.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2182 > 1024). Running this sequence through the model will result in indexing errors
 97%|█████████▋| 18452/19048 [04:26<00:08, 72.89it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


 99%|█████████▉| 18908/19048 [04:32<00:02, 69.12it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1484 > 1024). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 18926/19048 [04:33<00:01, 74.98it/s]

Text is longer than 1024
[<FrameSummary file <ipython-input-97-ccfa008fa64c>, line 16 in test>, <FrameSummary file <ipython-input-100-fe745795e8be>, line 7 in generate_answer>]


100%|██████████| 19048/19048 [04:35<00:00, 69.26it/s]

Precision: 0.962508115126596
Recall: 0.962508115126596
F1: 0.962508115126596
Skipped: 45





Epoch 2:
Precision: 0.962508115126596
Recall: 0.962508115126596
F1: 0.962508115126596
Skipped: 45

Epoch 3:
Precision: 0.9684621220331635
Recall: 0.9684621220331635
F1: 0.9684621220331635
Skipped: 45

Epoch 4:
Precision: 0.967658052982285
Recall: 0.967658052982285
F1: 0.967658052982285
Skipped: 45


Epoch 5:
Precision: 0.9523809523809523
Recall: 0.9523809523809523
F1: 0.9523809523809523
Skipped: 18939
