In [7]:
import json
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR

import random
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [8]:
num_epochs = 8

In [9]:
supporting_texts = json.load(open('../data/supporting.json'))
refuting_texts = json.load(open('../data/refuting.json'))

dev_supporting_texts = json.load(open('../data/dev_supporting.json'))
dev_refuting_texts = json.load(open('../data/dev_refuting.json'))

In [10]:
import random
split = 0.8

_prompt = '\n\n\nThe evidence supports the claim:\n'
train_list = [item + _prompt + 'Yes.' for item in supporting_texts]
train_list += [item + _prompt + 'Nope.' for item in refuting_texts]
random.shuffle(train_list)

dev_list = [item + _prompt + 'Yes.' for item in dev_supporting_texts]
dev_list += [item + _prompt + 'Nope.' for item in dev_refuting_texts]
random.shuffle(dev_list)


In [11]:
json.dump(train_list, open('../data/train_list.json', 'w'))
json.dump(dev_list, open('../data/dev_list.json', 'w'))

In [12]:
train_list = json.load(open('../data/train_list.json'))
dev_list = json.load(open('../data/dev_list.json'))

In [13]:
print(dev_list[0])

Evidence:
Uranium-235 (235U) is an isotope of uranium making up about 0.72 % of natural uranium. Unlike the predominant isotope uranium-238, it is fissile, i.e., it can sustain a fission chain reaction. It is the only fissile isotope that is a primordial nuclide or found in significant quantity in nature. Uranium-235 has a half-life of 703.8 million years. It was discovered in 1935 by Arthur Jeffrey Dempster. Its (fission) nuclear cross section for slow thermal neutrons is about 584.994 barns. For fast neutrons it is on the order of 1 barnArthur Jeffrey Dempster (August 14, 1886 -- March 11, 1950) was a Canadian-American physicist best known for his work in mass spectrometry and his discovery of the uranium isotope 235U.


Claim:
In 1935, Uranium-235 was discovered.


The evidence supports the claim:
Yes.


In [8]:
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def batchify(data, n):
    len_dict = {}
    for item in data:
        length = item.shape[1]
        try:
            len_dict[length].append(item)
        except:
            len_dict[length] = [item]

    batch_chunks = []
    for k in len_dict.keys():
        vectors = len_dict[k]
        batch_chunks += chunks(vectors, n)

    batches = []
    for chunk in batch_chunks:
        inputs = torch.stack([item[0] for item in chunk])
        batches.append((inputs))

    return batches

In [8]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [10]:
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

In [12]:
_limit = 1024
data = []
total_skipped = 0
for item in train_list:
    tokens = tokenizer.encode(item, return_tensors='pt')
    if tokens.shape[1] > _limit:
        total_skipped += 1
        continue
    data.append(tokens)
print(f'Skipped {total_skipped} out of {len(train_list)}')

Token indices sequence length is longer than the specified maximum sequence length for this model (1043 > 1024). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 

In [11]:
train_batches = batchify(data, 1)

NameError: name 'data' is not defined

In [12]:
def train(train_model, batches, optimizer, criterion):
    total_loss = 0.
    for i, batch in tqdm(enumerate(batches), total=len(batches)):
        model.train()
        inputs = batch
        optimizer.zero_grad()
        loss = train_model(inputs.cuda(), labels=inputs.cuda())[0]
        loss.backward()
        torch.nn.utils.clip_grad_norm_(train_model.parameters(), 0.5)
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(batches)

In [None]:
len(train_batches)

In [14]:
#train_batches = train_batches[:2000]

In [16]:
random.shuffle(train_batches)
scheduler = StepLR(optimizer, step_size=2, gamma=0.8)
for epoch in range(num_epochs):
    random.shuffle(train_batches)
    loss = train(model, train_batches, optimizer, criterion)
    #test(model, dev_list[:2000])
    print('Epoch:', epoch, 'Loss:', loss)
    torch.save({'epoch': epoch,
                'model_state_dict': model.state_dict()},
                'save_fever' + str(epoch))
    scheduler.step()

100%|██████████| 219832/219832 [4:00:53<00:00, 15.21it/s]  


Epoch: 0 Loss: 0.8349866022318717


100%|██████████| 219832/219832 [4:02:03<00:00, 15.14it/s]  


Epoch: 1 Loss: 0.27282461847403583


100%|██████████| 219832/219832 [3:59:59<00:00, 15.27it/s]  


Epoch: 2 Loss: 0.2118869653484744


100%|██████████| 219832/219832 [3:59:33<00:00, 15.29it/s]  


Epoch: 3 Loss: 0.1911627285202235


100%|██████████| 219832/219832 [4:04:37<00:00, 14.98it/s]    


Epoch: 4 Loss: 0.17449108368149846


100%|██████████| 219832/219832 [3:58:51<00:00, 15.34it/s]  


Epoch: 5 Loss: 0.1654880905636952


100%|██████████| 219832/219832 [4:08:54<00:00, 14.72it/s]  


Epoch: 6 Loss: 0.15662710911507668


100%|██████████| 219832/219832 [4:17:08<00:00, 14.25it/s]  


Epoch: 7 Loss: 0.15177448879509217


## Testing

In [9]:
import sys
import traceback

def test(model, data):
    model.eval()
    tp = 0
    fp = 0
    fn = 0

    skipped = 0

    for item in tqdm(data):
        expected = get_answer_from_text(item)
        predicted = ''
        try:
            predicted = generate_answer(model, item)
        except (IndexError, RuntimeError) as e:
            #print(str(e))
            #exc_type, exc_value, exc_traceback = sys.exc_info()
            #print(repr(traceback.extract_tb(exc_traceback)))
            skipped += 1
            continue

        if expected == predicted:
            tp += 1
        if expected == 'N' and predicted == 'Y':
            fp += 1
        if expected == 'Y' and predicted == 'N':
            fn += 1

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', f1)
    print('Skipped:', skipped)

In [10]:
def get_text_up_to_question(text):
    _claim_yn = 'The evidence supports the claim:\n'
    return text[:text.find(_claim_yn) + len(_claim_yn)]

In [11]:
def get_answer_from_text(text):
    _claim_yn = 'The evidence supports the claim:\n'
    pos = text.find(_claim_yn) + len(_claim_yn)
    return text[pos]

In [12]:
def generate_answer(model, text):
    prompt = get_text_up_to_question(text)
    tokens = tokenizer.encode(prompt, return_tensors='pt')
    _length = 1
    tokens_length = tokens.shape[1]
    if tokens_length + _length >= 1024:
        raise RuntimeError('Text is longer than 1024')
    output = model.generate(
             tokens.cuda(),
             max_length=tokens_length + _length, 
             pad_token_id=50256
    )
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return get_answer_from_text(output)

In [13]:
def generate_full_answer(model, text):
    prompt = get_text_up_to_question(text)
    tokens = tokenizer.encode(prompt, return_tensors='pt')
    _length = 3
    tokens_length = tokens.shape[1]
    if tokens_length + _length >= 1024:
        raise RuntimeError('Text is longer than 1024')
    output = model.generate(
             tokens.cuda(),
             max_length=tokens_length + _length, 
             pad_token_id=50256
    )
    score = model(output, labels=output)[0]
    out_text = tokenizer.decode(output[0][tokens_length:], skip_special_tokens=True)

    return out_text, float(score)

In [20]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
checkpoint = torch.load(f'save_fever5')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [23]:
get_answer_from_text(dev_list[1])

'N'

In [24]:
generate_answer(model, dev_list[1])

'N'

In [21]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever{epoch}')
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    test(model, dev_list)

Epoch 0


  1%|          | 94/12317 [00:01<03:18, 61.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1060 > 1024). Running this sequence through the model will result in indexing errors
100%|██████████| 12317/12317 [03:05<00:00, 66.51it/s]


Precision: 0.9332385175269807
Recall: 0.9704219225750326
F1: 0.9514670760832479
Skipped: 24
Epoch 1


100%|██████████| 12317/12317 [03:07<00:00, 65.74it/s]


Precision: 0.934847728407389
Recall: 0.976107732406603
F1: 0.9550323019381163
Skipped: 24
Epoch 2


100%|██████████| 12317/12317 [03:08<00:00, 65.48it/s]


Precision: 0.9458805870580387
Recall: 0.9741497767090347
F1: 0.9598070739549839
Skipped: 24
Epoch 3


100%|██████████| 12317/12317 [03:07<00:00, 65.84it/s]


Precision: 0.9344887635790696
Recall: 0.9796574806572198
F1: 0.9565401918343095
Skipped: 24
Epoch 4


100%|██████████| 12317/12317 [03:09<00:00, 65.09it/s]


Precision: 0.9274099883855982
Recall: 0.9790681380276756
F1: 0.9525391956373551
Skipped: 24
Epoch 5


100%|██████████| 12317/12317 [03:10<00:00, 64.79it/s]


Precision: 0.9371414317784984
Recall: 0.976943746207853
F1: 0.9566287557290782
Skipped: 24
Epoch 6


100%|██████████| 12317/12317 [03:07<00:00, 65.58it/s]


Precision: 0.9339583506181034
Recall: 0.9791249891275985
F1: 0.9560084925690022
Skipped: 24
Epoch 7


100%|██████████| 12317/12317 [03:08<00:00, 65.31it/s]

Precision: 0.9368394774070068
Recall: 0.9760707473556441
F1: 0.9560528215362405
Skipped: 24





## Fine tune on adversarial examples

In [13]:
supporting_texts = json.load(open('../data/adv_supporting.json'))
refuting_texts = json.load(open('../data/adv_refuting.json'))

dev_supporting_texts = json.load(open('../data/adv_dev_supporting.json'))
dev_refuting_texts = json.load(open('../data/adv_dev_refuting.json'))

In [14]:
import random

train_list = supporting_texts + refuting_texts
random.shuffle(train_list)

dev_list = dev_supporting_texts + dev_refuting_texts
random.shuffle(dev_list)

In [15]:
_limit = 1024
data = []
total_skipped = 0
for item in train_list:
    tokens = tokenizer.encode(item, return_tensors='pt')
    if tokens.shape[1] > _limit:
        total_skipped += 1
        continue
    data.append(tokens)
print(f'Skipped {total_skipped} out of {len(train_list)}')

Skipped 0 out of 13724


In [16]:
len(data)

13724

In [17]:
train_batches = batchify(data, 1)

In [29]:
model = GPT2LMHeadModel.from_pretrained('gpt2')
_ = model.cuda()

In [30]:
#epoch = 5
#checkpoint = torch.load(f'save_fever{epoch}')
#model.load_state_dict(checkpoint['model_state_dict'])

In [31]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

In [32]:
num_epochs = 30

In [33]:
random.shuffle(train_batches)
scheduler = StepLR(optimizer, step_size=2, gamma=0.8)
for epoch in range(num_epochs):
    random.shuffle(train_batches)
    loss = train(model, train_batches, optimizer, criterion)
    #test(model, dev_list[:2000])
    print('Epoch:', epoch, 'Loss:', loss)
    torch.save({'epoch': epoch,
                'model_state_dict': model.state_dict()},
                'save_fever_adv' + str(epoch))
    scheduler.step()

100%|██████████| 13724/13724 [18:48<00:00, 12.16it/s]


Epoch: 0 Loss: 2.8265762857699737


100%|██████████| 13724/13724 [18:40<00:00, 12.24it/s]


Epoch: 1 Loss: 2.398946189952684


100%|██████████| 13724/13724 [18:41<00:00, 12.24it/s]


Epoch: 2 Loss: 2.0794248728950744


100%|██████████| 13724/13724 [18:40<00:00, 12.25it/s]


Epoch: 3 Loss: 1.848062120221666


100%|██████████| 13724/13724 [18:39<00:00, 12.26it/s]


Epoch: 4 Loss: 1.6539967937258713


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 5 Loss: 1.513019484298695


100%|██████████| 13724/13724 [18:46<00:00, 12.19it/s]


Epoch: 6 Loss: 1.3904902372768198


100%|██████████| 13724/13724 [18:39<00:00, 12.26it/s]


Epoch: 7 Loss: 1.300064485630059


100%|██████████| 13724/13724 [18:34<00:00, 12.31it/s]


Epoch: 8 Loss: 1.2185287391775048


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 9 Loss: 1.1582863060133197


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 10 Loss: 1.101385164749951


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 11 Loss: 1.056454688653185


100%|██████████| 13724/13724 [18:45<00:00, 12.20it/s]


Epoch: 12 Loss: 1.0138452767031563


100%|██████████| 13724/13724 [18:44<00:00, 12.20it/s]


Epoch: 13 Loss: 0.987048549780362


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 14 Loss: 0.9519627090963975


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 15 Loss: 0.9310495258080663


100%|██████████| 13724/13724 [18:33<00:00, 12.33it/s]


Epoch: 16 Loss: 0.9055487251582932


100%|██████████| 13724/13724 [18:40<00:00, 12.25it/s]


Epoch: 17 Loss: 0.8882209499927384


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 18 Loss: 0.8701309680753325


100%|██████████| 13724/13724 [18:44<00:00, 12.20it/s]


Epoch: 19 Loss: 0.85805117948585


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 20 Loss: 0.8433181485769219


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 21 Loss: 0.833092354444466


100%|██████████| 13724/13724 [18:38<00:00, 12.27it/s]


Epoch: 22 Loss: 0.8230131825909107


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 23 Loss: 0.8151353167549658


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 24 Loss: 0.8054880833495393


100%|██████████| 13724/13724 [18:44<00:00, 12.21it/s]


Epoch: 25 Loss: 0.7983001110122787


100%|██████████| 13724/13724 [18:44<00:00, 12.21it/s]


Epoch: 26 Loss: 0.7931169445982843


100%|██████████| 13724/13724 [18:37<00:00, 12.28it/s]


Epoch: 27 Loss: 0.7878529269327509


100%|██████████| 13724/13724 [18:32<00:00, 12.34it/s]


Epoch: 28 Loss: 0.7815568830894757


100%|██████████| 13724/13724 [18:21<00:00, 12.46it/s]


Epoch: 29 Loss: 0.7777050177764047


## Testing with adversarial examples

In [63]:
get_answer_from_text(dev_list[12])

'N'

In [64]:
generate_answer(model, dev_list[12])

'N'

In [34]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever_adv{epoch}')
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    test(model, dev_list)

Epoch 0


100%|██████████| 1216/1216 [00:26<00:00, 46.15it/s]


Precision: 0.5761947700631199
Recall: 0.8565683646112601
F1: 0.6889487870619946
Skipped: 0
Epoch 1


100%|██████████| 1216/1216 [00:26<00:00, 46.05it/s]


Precision: 0.5304054054054054
Recall: 0.9515151515151515
F1: 0.6811279826464208
Skipped: 0
Epoch 2


100%|██████████| 1216/1216 [00:26<00:00, 45.85it/s]


Precision: 0.8162031438935913
Recall: 0.6343984962406015
F1: 0.7139079851930196
Skipped: 0
Epoch 3


100%|██████████| 1216/1216 [00:26<00:00, 45.72it/s]


Precision: 0.9070422535211268
Recall: 0.56
F1: 0.6924731182795699
Skipped: 0
Epoch 4


100%|██████████| 1216/1216 [00:26<00:00, 45.87it/s]


Precision: 0.8591549295774648
Recall: 0.6066907775768535
F1: 0.7111817700052995
Skipped: 0
Epoch 5


100%|██████████| 1216/1216 [00:26<00:00, 45.92it/s]


Precision: 0.5585970915312233
Recall: 0.9328571428571428
F1: 0.6987693953986089
Skipped: 0
Epoch 6


100%|██████████| 1216/1216 [00:26<00:00, 45.78it/s]


Precision: 0.6425166825548141
Recall: 0.8014268727705113
F1: 0.7132275132275133
Skipped: 0
Epoch 7


100%|██████████| 1216/1216 [00:26<00:00, 45.84it/s]


Precision: 0.7395048439181916
Recall: 0.7053388090349076
F1: 0.722017866526537
Skipped: 0
Epoch 8


100%|██████████| 1216/1216 [00:26<00:00, 45.76it/s]


Precision: 0.8177514792899409
Recall: 0.6506591337099812
F1: 0.7246984792868381
Skipped: 0
Epoch 9


100%|██████████| 1216/1216 [00:26<00:00, 45.70it/s]


Precision: 0.875
Recall: 0.5932203389830508
F1: 0.707070707070707
Skipped: 0
Epoch 10


100%|██████████| 1216/1216 [00:26<00:00, 45.78it/s]


Precision: 0.9634146341463414
Recall: 0.5302013422818792
F1: 0.683982683982684
Skipped: 0
Epoch 11


100%|██████████| 1216/1216 [00:26<00:00, 45.75it/s]


Precision: 0.7148803329864725
Recall: 0.7292993630573248
F1: 0.7220178665265371
Skipped: 0
Epoch 12


100%|██████████| 1216/1216 [00:26<00:00, 45.85it/s]


Precision: 0.6907114624505929
Recall: 0.7740863787375415
F1: 0.7300261096605745
Skipped: 0
Epoch 13


100%|██████████| 1216/1216 [00:26<00:00, 45.95it/s]


Precision: 0.7384780278670954
Recall: 0.7088477366255144
F1: 0.7233595800524933
Skipped: 0
Epoch 14


100%|██████████| 1216/1216 [00:26<00:00, 45.70it/s]


Precision: 0.7903780068728522
Recall: 0.6679574056147144
F1: 0.7240293809024133
Skipped: 0
Epoch 15


100%|██████████| 1216/1216 [00:26<00:00, 45.92it/s]


Precision: 0.7052313883299799
Recall: 0.7594799566630552
F1: 0.7313510693792383
Skipped: 0
Epoch 16


100%|██████████| 1216/1216 [00:26<00:00, 45.94it/s]


Precision: 0.6012323943661971
Recall: 0.8951507208387942
F1: 0.7193259610321222
Skipped: 0
Epoch 17


100%|██████████| 1216/1216 [00:26<00:00, 45.66it/s]


Precision: 0.7513397642015005
Recall: 0.7123983739837398
F1: 0.7313510693792383
Skipped: 0
Epoch 18


100%|██████████| 1216/1216 [00:26<00:00, 45.71it/s]


Precision: 0.8284313725490197
Recall: 0.6282527881040892
F1: 0.7145877378435518
Skipped: 0
Epoch 19


100%|██████████| 1216/1216 [00:26<00:00, 45.85it/s]


Precision: 0.8582375478927203
Recall: 0.6081447963800904
F1: 0.711864406779661
Skipped: 0
Epoch 20


100%|██████████| 1216/1216 [00:26<00:00, 45.75it/s]


Precision: 0.7415254237288136
Recall: 0.720164609053498
F1: 0.7306889352818372
Skipped: 0
Epoch 21


100%|██████████| 1216/1216 [00:26<00:00, 45.84it/s]


Precision: 0.848180677540778
Recall: 0.617351598173516
F1: 0.7145877378435518
Skipped: 0
Epoch 22


100%|██████████| 1216/1216 [00:26<00:00, 45.85it/s]


Precision: 0.7298136645962733
Recall: 0.7382198952879581
F1: 0.7339927121290993
Skipped: 0
Epoch 23


100%|██████████| 1216/1216 [00:26<00:00, 45.68it/s]


Precision: 0.7173252279635258
Recall: 0.7556029882604055
F1: 0.735966735966736
Skipped: 0
Epoch 24


100%|██████████| 1216/1216 [00:26<00:00, 45.71it/s]


Precision: 0.7112676056338029
Recall: 0.7610333692142088
F1: 0.735309412376495
Skipped: 0
Epoch 25


100%|██████████| 1216/1216 [00:26<00:00, 45.81it/s]


Precision: 0.7526997840172787
Recall: 0.7061803444782169
F1: 0.7286983795086253
Skipped: 0
Epoch 26


100%|██████████| 1216/1216 [00:26<00:00, 45.91it/s]


Precision: 0.7643865363735071
Recall: 0.7047047047047047
F1: 0.7333333333333334
Skipped: 0
Epoch 27


100%|██████████| 1216/1216 [00:26<00:00, 45.68it/s]


Precision: 0.8231780167264038
Recall: 0.6451310861423221
F1: 0.7233595800524933
Skipped: 0
Epoch 28


100%|██████████| 1216/1216 [00:26<00:00, 45.74it/s]


Precision: 0.7822671156004489
Recall: 0.6819960861056752
F1: 0.7286983795086253
Skipped: 0
Epoch 29


100%|██████████| 1216/1216 [00:26<00:00, 45.97it/s]

Precision: 0.7274590163934426
Recall: 0.7473684210526316
F1: 0.7372793354101765
Skipped: 0





In [36]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever_adv{epoch}')
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    test(model, dev_list)

Epoch 0


100%|██████████| 1216/1216 [00:26<00:00, 46.07it/s]


Precision: 0.8253768844221105
Recall: 0.6100278551532033
F1: 0.7015483182060865
Skipped: 0
Epoch 1


100%|██████████| 1216/1216 [00:26<00:00, 45.18it/s]


Precision: 0.9275568181818182
Recall: 0.5605150214592275
F1: 0.6987693953986089
Skipped: 0
Epoch 2


100%|██████████| 1216/1216 [00:26<00:00, 45.34it/s]


Precision: 0.7562642369020501
Recall: 0.6626746506986028
F1: 0.7063829787234043
Skipped: 0
Epoch 3


100%|██████████| 1216/1216 [00:27<00:00, 44.92it/s]


Precision: 0.8538754764930114
Recall: 0.6103542234332425
F1: 0.7118644067796611
Skipped: 0
Epoch 4


100%|██████████| 1216/1216 [00:27<00:00, 44.86it/s]


Precision: 0.8784313725490196
Recall: 0.5983971504897596
F1: 0.711864406779661
Skipped: 0
Epoch 5


100%|██████████| 1216/1216 [00:27<00:00, 44.96it/s]


Precision: 0.7762237762237763
Recall: 0.650390625
F1: 0.7077577045696068
Skipped: 0
Epoch 6


100%|██████████| 1216/1216 [00:26<00:00, 45.18it/s]


Precision: 0.8467243510506799
Recall: 0.6272893772893773
F1: 0.7206733298264071
Skipped: 0
Epoch 7


100%|██████████| 1216/1216 [00:27<00:00, 44.66it/s]


Precision: 0.8681177976952625
Recall: 0.6091644204851752
F1: 0.7159450897571277
Skipped: 0
Epoch 8


100%|██████████| 1216/1216 [00:27<00:00, 44.99it/s]


Precision: 0.7633333333333333
Recall: 0.6849451645064806
F1: 0.7220178665265371
Skipped: 0
Epoch 9


100%|██████████| 1216/1216 [00:26<00:00, 45.07it/s]

Precision: 0.8235294117647058
Recall: 0.6417212347988774
F1: 0.7213459516298634
Skipped: 0





In [37]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.cuda()
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever{epoch}')
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    test(model, dev_list)

Epoch 0


100%|██████████| 1216/1216 [00:26<00:00, 45.30it/s]


Precision: 0.5278491859468724
Recall: 0.9263157894736842
F1: 0.6724890829694323
Skipped: 0
Epoch 1


100%|██████████| 1216/1216 [00:26<00:00, 45.07it/s]


Precision: 0.523645743766122
Recall: 0.9199395770392749
F1: 0.6673972602739725
Skipped: 0
Epoch 2


100%|██████████| 1216/1216 [00:27<00:00, 44.80it/s]


Precision: 0.5274442538593482
Recall: 0.924812030075188
F1: 0.6717640633533589
Skipped: 0
Epoch 3


100%|██████████| 1216/1216 [00:27<00:00, 44.08it/s]


Precision: 0.521404109589041
Recall: 0.9269406392694064
F1: 0.6673972602739726
Skipped: 0
Epoch 4


100%|██████████| 1216/1216 [00:27<00:00, 44.15it/s]


Precision: 0.5222222222222223
Recall: 0.9299847792998478
F1: 0.668856048166393
Skipped: 0
Epoch 5


100%|██████████| 1216/1216 [00:27<00:00, 44.17it/s]


Precision: 0.5134907251264755
Recall: 0.9530516431924883
F1: 0.6673972602739726
Skipped: 0
Epoch 6


100%|██████████| 1216/1216 [00:27<00:00, 44.42it/s]


Precision: 0.5165394402035624
Recall: 0.9427244582043344
F1: 0.6673972602739726
Skipped: 0
Epoch 7


100%|██████████| 1216/1216 [00:27<00:00, 44.53it/s]


Precision: 0.5186125211505922
Recall: 0.9474497681607419
F1: 0.6703116457080373
Skipped: 0
Epoch 8


100%|██████████| 1216/1216 [00:27<00:00, 43.91it/s]


ZeroDivisionError: division by zero

## Tests with dev from COQA

In [20]:
epoch = 5
checkpoint = torch.load(f'save_fever{epoch}')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [21]:
dev_dict = json.load(open('../data/coqa-dev-v1.0.json', encoding='utf8'))

In [22]:
print(dev_dict['data'][0]['story'])

Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer's horses slept. But Cotton wasn't alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton's mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer's orange paint, she used it to paint herself like them. When her mommy and sisters found her they started laughing. 

"What are you doing, Cotton?!" 

"I only wanted to be more like you". 

Cotton's mommy rubbed her face on Cotton's and said "Oh Cotton, but your fur is so pretty and special, like you. We would never want you to be any o

In [23]:
story = dev_dict['data'][0]['story'].replace('\n\n', '\n').replace('\n', '')

text = f"""
Evidence:
{story}

Claim:
What color was Cotton? Blue.

The evidence supports the claim:
"""[1:]


In [24]:
print(text)

Evidence:
Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer's horses slept. But Cotton wasn't alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton's mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer's orange paint, she used it to paint herself like them. When her mommy and sisters found her they started laughing. "What are you doing, Cotton?!" "I only wanted to be more like you". Cotton's mommy rubbed her face on Cotton's and said "Oh Cotton, but your fur is so pretty and special, like you. We would never want you to be a

In [25]:
generate_full_answer(model, text)

('Yes.\n\n', 7.669204235076904)

In [26]:
def get_text_from_data_item(item, max_num_questions=0, question_number=-1, last_question=True):
    text = 'In the text below two people are discussing a story.\n\n'
    text += 'Story:\n' + item['story'] + '\n\n'
    text += 'Discussion:\n'
    text += '\n'.join(['Q: ' + q['input_text'] 
                       + '\nA: ' + a['input_text'] 
                       for q, a in zip(item['questions'][max(0,question_number-max_num_questions):question_number+1], 
                                       item['answers'][max(0,question_number-max_num_questions):question_number+1]) 
                      ])
    if not last_question:
        text = '\n'.join(text.split('\n')[:-1]) + '\n'
    return text

In [27]:
index = 0
number = 0
small_text = get_text_from_data_item(dev_dict['data'][index], 
                                     max_num_questions=8,
                                     question_number=number,
                                     last_question=True)
last_question = small_text.split('\n')[-1].replace('A: ', '')
discussion_anchor = 'Discussion:\n'
discussion = small_text[small_text.find(discussion_anchor) + len(discussion_anchor):]

In [28]:
print(discussion)

Q: What color was Cotton?
A: white


In [29]:
last_question

'white'

In [30]:
statement_model = GPT2LMHeadModel.from_pretrained('gpt2')
statement_model.cuda()
checkpoint = torch.load('save_statement' + str(2))
statement_model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [31]:
def get_statement_prompt(item, max_num_questions=0, question_number=-1, use_answer=None):
    text = 'Discussion:\n'
    text += '\n'.join(['Q: ' + q['input_text'] 
                       + '\nA: ' + a['input_text'] 
                       for q, a in zip(item['questions'][max(0,question_number-max_num_questions):question_number+1], 
                                       item['answers'][max(0,question_number-max_num_questions):question_number+1]) 
                      ])
    if use_answer:
        text = '\n'.join(text.split('\n')[:-1]) + '\n' + 'A: ' + use_answer + '\n'
    text += '\nStatement:\n'
    return text

In [32]:
def generate_statement_from_dialogue(statement_model, prompt):
    tokens = tokenizer.encode(prompt, return_tensors='pt')
    _length = 50
    tokens_length = tokens.shape[1]
    if tokens_length + _length > 1024:
        return ''
    output = statement_model.generate(
             tokens.cuda(),
             max_length=tokens_length + _length,
             pad_token_id=50256
    )
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    offset = len(prompt)
    start = offset
    end = output.find('\n', start)
    return output[start:end].split(':')[-1].strip()

In [33]:
def get_description_from_data_item(item):
    return item['story']

In [34]:
description = get_description_from_data_item(dev_dict['data'][index])

In [35]:
statement_prompt = get_statement_prompt(dev_dict['data'][index], 
                         max_num_questions=5,
                         question_number=number)

In [36]:
print(statement_prompt)

Discussion:
Q: What color was Cotton?
A: white
Statement:



In [37]:
def create_claim_from_description_and_dialogue(description, dialogue):
    if not dialogue:
        return ''
    if dialogue[-1] == '.':
        dialogue = dialogue[:-1]    
    text = 'Evidence:\n'
    text += description.replace('\n\n', '\n') + '\n\n'
    text += 'Claim:\n'
    text += dialogue + '\n\n'
    text += 'The evidence supports the claim:\n'
    return text

In [38]:
statement = generate_statement_from_dialogue(statement_model, statement_prompt)
text = create_claim_from_description_and_dialogue(description, statement)

In [39]:
text

'Evidence:\nOnce upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer\'s horses slept. But Cotton wasn\'t alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton\'s mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer\'s orange paint, she used it to paint herself like them. When her mommy and sisters found her they started laughing. \n"What are you doing, Cotton?!" \n"I only wanted to be more like you". \nCotton\'s mommy rubbed her face on Cotton\'s and said "Oh Cotton, but your fur is so pretty and special, like you. We would never wa

In [40]:
generate_answer(model, text)

'Y'

In [41]:
import numpy as np
from sentence_transformers import SentenceTransformer

sentence_model = SentenceTransformer('msmarco-distilbert-base-v3')
sentence_model = sentence_model.cuda()

In [42]:
def get_embeddings_from_text(text):
    outputs = sentence_model.encode(text)
    return outputs

In [43]:
def get_all_answers(dev_dict, dev_index):
    answers = [[item['input_text'] for item in dev_dict['data'][dev_index]['answers']]]
    answers += [[item['input_text'] for item in dev_dict['data'][dev_index]['additional_answers'][str(index)]] for index in range(3)]
    return [list(set([answers[j][i] for j in range(len(answers))])) for i in range(len(answers[0]))]

In [44]:
def compute_accuracy_of_model(model, dlist):
    correct_predictions = []
    total_number_of_questions = 0
    correct_answers = 0
    wrong_predictions = []

    false_positives = []
    for index, text in tqdm(enumerate(dlist), total=len(dlist)):

        all_answers = get_all_answers(dev_dict, index)
        total_questions = len(all_answers)        
        
        for number in range(total_questions):
            small_text = get_text_from_data_item(dev_dict['data'][index], 
                                                 max_num_questions=8,
                                                 question_number=number,
                                                 last_question=True)
            last_question = small_text.split('\n')[-1].replace('A: ', '')
            description = get_description_from_data_item(dev_dict['data'][index])
            statement_prompt = get_statement_prompt(dev_dict['data'][index], 
                         max_num_questions=5,
                         question_number=number)
            statement = generate_statement_from_dialogue(statement_model, statement_prompt)
            text = create_claim_from_description_and_dialogue(description, statement)
            try:
                y_n_from_fever = generate_answer(model, text)
            except RuntimeError:
                continue
            
            label = last_question
            

            if y_n_from_fever == 'Y' or y_n_from_fever == 'N' and label[:2].lower() == 'no':
                correct_answers += 1

                correct_predictions.append({
                    'index': index,
                    'number': number,
                    'statement': statement,
                    'label': label,
                })                    
            else:
                wrong_predictions.append({
                    'index': index,
                    'number': number,
                    'statement': statement,
                    'label': label,
                })                 


            total_number_of_questions += 1

    return correct_answers/total_number_of_questions, wrong_predictions, false_positives, correct_predictions

In [44]:
accuracies = []
wrong_predictions = []
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever{epoch}')
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    accuracy, wrong_prediction, _, _ = compute_accuracy_of_model(model, dev_list[:50])
    accuracies.append(accuracy)
    wrong_predictions.append(wrong_prediction)
    print(accuracy)

Epoch 0


100%|██████████| 50/50 [04:54<00:00,  5.88s/it]


0.9285714285714286
Epoch 1


100%|██████████| 50/50 [04:49<00:00,  5.79s/it]


0.9235588972431078
Epoch 2


100%|██████████| 50/50 [04:39<00:00,  5.59s/it]


0.9223057644110275
Epoch 3


100%|██████████| 50/50 [04:41<00:00,  5.63s/it]


0.9273182957393483
Epoch 4


100%|██████████| 50/50 [04:41<00:00,  5.63s/it]


0.931077694235589
Epoch 5


100%|██████████| 50/50 [04:41<00:00,  5.63s/it]


0.9423558897243107
Epoch 6


100%|██████████| 50/50 [04:38<00:00,  5.56s/it]


0.9398496240601504
Epoch 7


100%|██████████| 50/50 [04:41<00:00,  5.64s/it]

0.9461152882205514





In [44]:
wrong_predictions[5]

[{'index': 0,
  'number': 10,
  'statement': 'The other cats licked her face when Cotton emerged from the bucket of water.',
  'label': 'licked her face'},
 {'index': 1,
  'number': 8,
  'statement': 'They did not know what the note was.',
  'label': 'unknown'},
 {'index': 2, 'number': 1, 'statement': 'Yes.', 'label': 'Yes'},
 {'index': 2, 'number': 3, 'statement': 'I know her.', 'label': 'Yes'},
 {'index': 4,
  'number': 5,
  'statement': 'Kendra does not want to miss story time.',
  'label': 'story time'},
 {'index': 5,
  'number': 5,
  'statement': 'Arthur Kill and the Kill Van Kull separate New York from new jersey.',
  'label': 'Arthur Kill and the Kill Van Kull'},
 {'index': 7, 'number': 17, 'statement': 'He is 50.', 'label': '50'},
 {'index': 10, 'number': 8, 'statement': 'Mayweather is 38.', 'label': '38'},
 {'index': 11,
  'number': 7,
  'statement': 'Frederick G. Kilgour is not currently enrolled at the University.',
  'label': 'He is not'},
 {'index': 14,
  'number': 16,
  '

#### TRY WITHOUT ADDITIONAL NEGATIONS IN TRAINING DATA!!!

## Testing adversarial examples

In [98]:
adversarial_list = json.load(open('../data/adversarial_claims_dev.json'))

In [99]:
print(adversarial_list[1])

In the text below two people are discussing a story.

Story:
My doorbell rings. On the step, I find the elderly Chinese lady, small and slight, holding the hand of a little boy. In her other hand, she holds a paper carrier bag. 

I know this lady. It is not her first visit. She is the boy's grandmother, and her daughter bought the house next door last October. 

Her daughter, Nicole, speaks fluent English. But she is now in Shanghai, and her parents are here with the little boy. Nicole has obviously told her mother that I am having heart surgery soon, so her mother has decided I need more nutrients. 

I know what is inside the bag--a thermos with hot soup and a stainless-steel container with rice, vegetables and either chicken, meat or shrimp, sometimes with a kind of pancake. This has become an almost-daily practice. 

Communication between us is somewhat affected by the fact that she doesn't speak English and all I can say in Chinese is hello. Once, she brought an iPad as well as the

In [100]:
_question_prompt = '\nQ: '
_correct_answer_prompt = '\nCA: '
_wrong_answer_prompt = '\nWA: '
    
def get_text_up_to_question_number(text, number):
    pos = text.find(_correct_answer_prompt)
    for _ in range(number):
        pos = text.find(_answer_prompt, pos + 1)
    return text[0:pos + 1]
    
def get_answers_number(text):
    return text.count(_correct_answer_prompt)

def get_correct_answer_number(text, number):
    pos = text.find(_correct_answer_prompt)
    for _ in range(number):
        pos = text.find(_correct_answer_prompt, pos + 1)
    end = text.find('\n', pos + len(_correct_answer_prompt))
    return text[pos + len(_correct_answer_prompt):end]

def get_wrong_answer_number(text, number):
    pos = text.find(_wrong_answer_prompt)
    for _ in range(number):
        pos = text.find(_wrong_answer_prompt, pos + 1)
    end = text.find('\n', pos + len(_wrong_answer_prompt))
    return text[pos + len(_wrong_answer_prompt):end]


def get_question_number(text, number):
    pos = text.find(_question_prompt)
    for _ in range(number):
        pos = text.find(_question_prompt, pos + 1)
    end = text.find('\n', pos + len(_question_prompt))
    return text[pos + len(_question_prompt):end]

def get_description_from_text(text):
    start_prompt = 'Story:'
    end_prompt = 'Discussion:'
    return text[text.find(start_prompt) + len(start_prompt):text.find(end_prompt)]

def get_discussion_from_text(text):
    start_prompt = 'Discussion:'
    return text[text.find(start_prompt) + len(start_prompt):].strip()

def get_statement_prompt_from_text(full_text, number, max_questions=5):
    text = 'Discussion:\n'
    questions_and_answers_list = get_discussion_from_text(full_text).split('\n')
    start = max(0, (number + 1 - max_questions) * 3)
    end = (number + 1) * 3
    questions_and_answers_list = questions_and_answers_list[start:end]
    text += '\n'.join(questions_and_answers_list)
    text += '\nStatement:\n'
    return text

In [101]:
get_discussion_from_text(adversarial_list[0])

"Q: Whose paint was it?\nWA: Cotton's mommy\nCA: the farmer."

In [102]:
print(get_statement_prompt_from_text(adversarial_list[1], 1))

Discussion:
Q: How is she related to the boy?
WA: He is his grandmother
CA: mother.
Q: What is in the bag?
WA: rice, vegetables and either chicken, meat or shrimp
CA: food.
Statement:



In [107]:
print(get_correct_answer_number(adversarial_list[0], 0))

the farmer.


In [108]:
print(get_wrong_answer_number(adversarial_list[0], 0))

Cotton's mommy


In [93]:
len(adversarial_list)

4870

In [51]:
sum([get_answers_number(item) for item in adversarial_list])

0

In [52]:
get_answers_number(adversarial_list[0])

0

In [None]:
def create_new_claim_set()

## Testing trained model

In [53]:
def generate_multiple_y_n_answers(model, prompt, num_replicas=25):
    model.train()
    outputs_count = {}
    with torch.no_grad():
        tokens = tokenizer.encode(prompt, return_tensors='pt')
        tokens = tokens.repeat(num_replicas,1)
        _length = 50
        tokens_length = tokens.shape[1]
        if tokens_length + _length > 1024:
            return ''

        
        output = model.generate(
             tokens.cuda(),
             max_length=tokens_length + _length,
             pad_token_id=50256
        )
        for index in range(num_replicas):
            text = tokenizer.decode(output[index, :], skip_special_tokens=True)
            answer = get_answer_from_text(text)
            outputs_count.setdefault(answer, 0)
            outputs_count[answer] += 1

    total = sum(v for v in outputs_count.values())
    return [(k, v / total) for k, v in outputs_count.items()]

In [54]:
discussion_anchor = 'Discussion:\n'

def compute_precision_of_model(model, dlist):
    correct_predictions = []
    total_number_of_questions = 0
    correct_answers = 0
    wrong_predictions = []

    false_positives = []
    for index, text in tqdm(enumerate(dlist), total=len(dlist)):

        total_questions = get_answers_number(text)
        
        for number in range(total_questions):
            small_text = text
            description = get_description_from_text(text)
            statement_prompt = get_statement_prompt_from_text(text, number)
            statement = generate_statement_from_dialogue(statement_model, statement_prompt)
            claim = create_claim_from_description_and_dialogue(description, statement)
            #try:
            y_n_with_score = generate_multiple_y_n_answers(model, claim)
            #except RuntimeError:
            #    continue
            

            #if y_n_with_score[0][0] == 'N':
            if len(y_n_with_score) == 2:
                correct_answers += 1

                correct_predictions.append({
                    'index': index,
                    'number': number,
                    'statement': statement,
                    'yn': y_n_with_score,
                })
            else:
                wrong_predictions.append({
                    'index': index,
                    'number': number,
                    'statement': statement,
                    'yn': y_n_with_score,
                })


            total_number_of_questions += 1
    print(total_number_of_questions)
    return correct_answers/total_number_of_questions, wrong_predictions, false_positives, correct_predictions

In [55]:
from transformers import GPT2Config

In [61]:
accuracies = []
wrong_predictions = []
for epoch in range(0, num_epochs):
    checkpoint = torch.load(f'save_fever{epoch}')
    config = GPT2Config(attn_pdrop=0.01, resid_pdrop=0.01, embd_pdrop=0.01)
    model = GPT2LMHeadModel(config).from_pretrained('gpt2').cuda()
    model.load_state_dict(checkpoint['model_state_dict'])
    _ = model.eval()
    print(f'Epoch {epoch}')
    accuracy, wrong_prediction, _, _ = compute_precision_of_model(model, adversarial_list)
    accuracies.append(accuracy)
    wrong_predictions.append(wrong_prediction)
    print(accuracy)

Epoch 0


100%|██████████| 37/37 [01:07<00:00,  1.83s/it]


57
0.03508771929824561
Epoch 1


100%|██████████| 37/37 [01:07<00:00,  1.84s/it]


57
0.08771929824561403
Epoch 2


100%|██████████| 37/37 [01:08<00:00,  1.85s/it]


57
0.12280701754385964
Epoch 3


100%|██████████| 37/37 [01:08<00:00,  1.84s/it]


57
0.14035087719298245
Epoch 4


100%|██████████| 37/37 [01:08<00:00,  1.84s/it]


57
0.10526315789473684
Epoch 5


100%|██████████| 37/37 [01:08<00:00,  1.85s/it]


57
0.03508771929824561
Epoch 6


100%|██████████| 37/37 [01:08<00:00,  1.84s/it]


57
0.10526315789473684
Epoch 7


100%|██████████| 37/37 [01:08<00:00,  1.85s/it]

57
0.07017543859649122





In [54]:
wrong_predictions[2]

[{'index': 0,
  'number': 0,
  'statement': 'The library for history, law, philosophy, science and theology. is the library for.',
  'yn': [('Y', 1.0)]},
 {'index': 0,
  'number': 1,
  'statement': '150,000 books survived the Pre Lateran period.',
  'yn': [('Y', 1.0)]},
 {'index': 2,
  'number': 0,
  'statement': 'Venters called Lassiter a dark bay.',
  'yn': [('Y', 1.0)]},
 {'index': 2,
  'number': 1,
  'statement': 'The man who had led Milly Erne to Cottonwoods was oppressing her.',
  'yn': [('Y', 1.0)]},
 {'index': 2,
  'number': 2,
  'statement': 'Venters was hoping she could keep the need of a helper from happening to her.',
  'yn': [('Y', 1.0)]},
 {'index': 3,
  'number': 0,
  'statement': 'He saves the Abominable Snow Monster during a snow storm.',
  'yn': [('Y', 1.0)]},
 {'index': 4,
  'number': 0,
  'statement': 'Francesco noticed it.',
  'yn': [('Y', 1.0)]},
 {'index': 4,
  'number': 1,
  'statement': "The garrison was in Francesco's group.",
  'yn': [('Y', 1.0)]},
 {'index':

### Uploading model 

In [5]:
model = GPT2LMHeadModel.from_pretrained('gpt2')

epoch = 5
checkpoint = torch.load(f'save_fever{epoch}')
model.load_state_dict(checkpoint['model_state_dict'])
model.push_to_hub("fractalego/fact-checking")

'https://huggingface.co/fractalego/fact-checker/commit/a3185c8c177d8866908ea46c6b40abe9c7afddcb'

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.push_to_hub("fractalego/fact-checking")

'https://huggingface.co/fractalego/fact-checker/commit/ef06b4530a000f7671efed80a4440e752f2da351'