#### Set Up SelfCheckGPT

In [None]:
import torch
import modeling_selfcheck
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
selfcheck_1gram = modeling_selfcheck.SelfCheckNgram(n=1)
selfcheck_2gram = modeling_selfcheck.SelfCheckNgram(n=2)
selfcheck_3gram = modeling_selfcheck.SelfCheckNgram(n=3)
selfcheck_4gram = modeling_selfcheck.SelfCheckNgram(n=4)
selfcheck_5gram = modeling_selfcheck.SelfCheckNgram(n=5)

#### Load Wikibio Dataset

In [None]:
import json

with open("data/dataset_v3.json", "r") as f:
    content = f.read()
    
dataset = json.loads(content)
print("The length of the dataset: {}".format(len(dataset)))

In [None]:
import numpy as np

label_mapping = {
    'accurate': 0.0,
    'minor_inaccurate': 0.5,
    'major_inaccurate': 1.0,
}

human_label_detect_False   = {}
human_label_detect_True    = {}
human_label_detect_False_h = {}

for i_ in range(len(dataset)):
    dataset_i = dataset[i_]
    idx = dataset_i["wiki_bio_test_idx"]
    raw_label = np.array([label_mapping[x] for x in dataset_i['annotation']])
    
    human_label_detect_False[idx] = (raw_label > 0.499).astype(np.int32).tolist()
    human_label_detect_True[idx] = (raw_label < 0.499).astype(np.int32).tolist()
    average_score = np.mean(raw_label)
    if (average_score < 0.99):
        human_label_detect_False_h[idx] = (raw_label > 0.99).astype(np.int32).tolist()
        
print("Length of False:", len(human_label_detect_False))
print("Length of True:", len(human_label_detect_True)) 
print("Length of False_h:", len(human_label_detect_False_h))

#### SelfCheck Unigram

In [None]:
from tqdm import tqdm

scores_1gram = {}

for i in tqdm(range(len(dataset))):
    x = dataset[i]
    idx = dataset[i]['wiki_bio_test_idx'] 

    scores_1gram[idx] = selfcheck_1gram.predict(
        passage= x['gpt3_text'],
        sentences = x['gpt3_sentences'],           
        sampled_passages = x['gpt3_text_samples'],
    )

with open("data/scores_ngram/scores_1gram.json", "w") as outfile: 
    json.dump(scores_1gram, outfile)

#### SelfCheck 2-Gram

In [None]:
from tqdm import tqdm

scores_2gram = {}

for i in tqdm(range(len(dataset))):
    x = dataset[i]
    idx = dataset[i]['wiki_bio_test_idx'] 

    scores_2gram[idx] = selfcheck_2gram.predict(
        passage= x['gpt3_text'],
        sentences = x['gpt3_sentences'],           
        sampled_passages = x['gpt3_text_samples'],
    )

with open("data/scores_ngram/scores_2gram.json", "w") as outfile: 
    json.dump(scores_2gram, outfile)

#### SelfCheck 3-Gram

In [None]:
from tqdm import tqdm

scores_3gram = {}

for i in tqdm(range(len(dataset))):
    x = dataset[i]
    idx = dataset[i]['wiki_bio_test_idx'] 

    scores_3gram[idx] = selfcheck_3gram.predict(
        passage= x['gpt3_text'],
        sentences = x['gpt3_sentences'],           
        sampled_passages = x['gpt3_text_samples'],
    )

with open("data/scores_ngram/scores_3gram.json", "w") as outfile: 
    json.dump(scores_3gram, outfile)

#### SelfCheck 4-Gram

In [None]:
from tqdm import tqdm

scores_4gram = {}

for i in tqdm(range(len(dataset))):
    x = dataset[i]
    idx = dataset[i]['wiki_bio_test_idx'] 

    scores_4gram[idx] = selfcheck_4gram.predict(
        passage= x['gpt3_text'],
        sentences = x['gpt3_sentences'],           
        sampled_passages = x['gpt3_text_samples'],
    )

with open("data/scores_ngram/scores_4gram.json", "w") as outfile: 
    json.dump(scores_4gram, outfile)

#### SelfCheck 5-Gram

In [None]:
from tqdm import tqdm

scores_5gram = {}

for i in tqdm(range(len(dataset))):
    x = dataset[i]
    idx = dataset[i]['wiki_bio_test_idx'] 

    scores_5gram[idx] = selfcheck_5gram.predict(
        passage= x['gpt3_text'],
        sentences = x['gpt3_sentences'],           
        sampled_passages = x['gpt3_text_samples'],
    )

with open("data/scores_ngram/scores_5gram.json", "w") as outfile: 
    json.dump(scores_5gram, outfile)