## Set Up

#### Libraries

In [1]:
import sys
sys.path.append("self_check_gpt")

import os
from pathlib import Path
import json
import re
import time
from tqdm import tqdm
from dotenv import load_dotenv

import pandas as pd

import warnings
warnings.filterwarnings("ignore")

from helper.utils import read_data
from self_check_gpt import modeling_selfcheck

#### Datasets

In [2]:
FOLDER_PATH_HALUEVAL = "data/halu_eval_2"
FILE_PATH_SELFCHECKGPT = "data/self_check_gpt/dataset_v3.json"

dataset_halueval, dataset_selfcheckgpt = read_data(FOLDER_PATH_HALUEVAL, FILE_PATH_SELFCHECKGPT)

Load HaluEval 2.0
Length of Bio-Medical: 200.
Length of Education: 200.
Length of Finance: 200.
Length of Open-Domain: 200.
Length of Science: 200.

Loading SelfCheckGPT
The length of the dataset: 238.


In [3]:
dataset_selfcheckgpt[0]

{'gpt3_text': "John Russell Reynolds (1820–1876) was an English lawyer, judge, and author. He was born in London, the son of a barrister, and was educated at Eton College and Trinity College, Cambridge. He was called to the bar in 1845, and became a Queen's Counsel in 1859. He was appointed a judge of the Court of Common Pleas in 1867, and was knighted in 1871.\n\nReynolds was a prolific author, writing on a wide range of topics. He wrote several books on legal topics, including The Law of Libel and Slander (1863), The Law of Copyright (1865), and The Law of Patents for Inventions (1868). He also wrote on a variety of other topics, including history, biography, and literature. He was a frequent contributor to the Saturday Review, and wrote several books on Shakespeare, including The Mystery of William Shakespeare (1848) and The Authorship of Shakespeare (1875). He also wrote a biography of the poet John Keats (1848).",
 'wiki_bio_text': 'Sir John Russell Reynolds, 1st Baronet (22 May 1

#### Metrics

In [4]:
SENTENCE_ENDINGS = r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!|\n)\s*'

load_dotenv()
api_key = os.getenv('UPSTAGE_API_KEY')

selfcheckgpt_prompt = modeling_selfcheck.SelfCheckAPIPrompt(
    client_type="openai", 
    base_url="https://api.upstage.ai/v1/solar", 
    model="solar-pro", 
    api_key=api_key)
selfcheckgpt_unigram = modeling_selfcheck.SelfCheckNgram(
    n=1
)

Initiate OpenAI client... model = solar-pro
SelfCheck-1gram initialized


## Testing
### Benchmark: Halu Eval

In [5]:
output_folder_halueval = "data\scores\halu_eval_2"

for category in dataset_halueval.keys():
    output_path_category = Path(os.path.join(output_folder_halueval, category))
    
    if not output_path_category.is_file():
        output_path_category.mkdir(parents=True, exist_ok=True)

#### SelfCheckGPT Unigram

In [6]:
def perform_halueval_unigram():
    for category in dataset_halueval.keys():
        samples = dataset_halueval[category]
        output_path = os.path.join(output_folder_halueval, category, "scores_selfcheckgpt_1gram.json")
        scores_halueval_unigram = {}
        
        for i, sample in enumerate(tqdm(samples)):
            response = sample['chatgpt_response']
            setences = re.split(SENTENCE_ENDINGS, response)
            sentences = [s.strip() for s in setences if s.strip()]
            
            scores_halueval_unigram[sample['id']] = selfcheckgpt_unigram.predict(
                passage=response,
                sentences=sentences,
                sampled_passages=sample['sample_passages'],
            )
        
        with open(output_path, 'w') as outfile:
            json.dump(scores_halueval_unigram, outfile)
    
# perform_halueval_unigram()

#### SelfCheckGPT Prompt API - Solar Pro

In [7]:
# TODO: Come up w/ better ways to split sentences.

# data = dataset_halueval['Bio-Medical'][28]
# data

# response = data['chatgpt_response']
# setences = re.split(SENTENCE_ENDINGS, response)
# sentences = [s.strip() for s in setences if s.strip()]
# sentences

In [8]:
def perform_halueval_prompt():
    for category in dataset_halueval.keys():
        print(f"Processing {category}")
        samples = dataset_halueval[category]
        output_path = os.path.join(output_folder_halueval, category, "scores_selfcheckgpt_prompt_solar_pro.json")
        scores_halueval_prompt = {}
        
        for i, sample in enumerate(tqdm(samples)):
            if i not in [28,28,30]:
                continue
            
            print(sample['id'])
            response = sample['chatgpt_response']
            setences = re.split(SENTENCE_ENDINGS, response)
            sentences = [s.strip() for s in setences if s.strip()]
            
            scores = selfcheckgpt_prompt.predict(
                sentences=sentences,
                sample_passages=sample['sample_passages'],
            )
            scores_halueval_prompt[sample['id']] = list(scores)
            
        with open(output_path, 'w') as outfile:
            json.dump(scores_halueval_prompt, outfile)
            
# perform_halueval_prompt()

### Benchmark: SelfCheckGPT

In [9]:
output_folder_selfcheckgpt = "data\scores\self_check_gpt"
output_path = Path(output_folder_selfcheckgpt)

if not output_path.is_file():
    output_path.mkdir(parents=True, exist_ok=True)

#### SelfCheckGPT Unigram

In [10]:
def perform_selfcheckgpt_unigram():
    scores_selfcheckgpt_unigram = {} 
    output_path = os.path.join(output_folder_selfcheckgpt, "scores_selfcheckgpt_1gram.json")

    for i, sample in enumerate(tqdm(dataset_selfcheckgpt)):
        scores_selfcheckgpt_unigram[sample['wiki_bio_test_idx']] = selfcheckgpt_unigram.predict(
            passage=sample['gpt3_text'],
            sentences=sample['gpt3_sentences'],
            sampled_passages=sample['gpt3_text_samples'],
        )
        
    with open(output_path, 'w') as outfile:
        json.dump(scores_selfcheckgpt_unigram, outfile)
        
# perform_selfcheckgpt_unigram()

#### SelfCheckGPT Prompt API - Solar Pro

In [None]:
def perform_selfcheck_prompt():
    scores_selfcheckgpt_prompt = {}
    output_path = os.path.join(output_folder_selfcheckgpt, "scores_selfcheckgpt_prompt_solar_pro.json")

    for i, sample in enumerate(tqdm(dataset_selfcheckgpt)):
        scores = selfcheckgpt_prompt.predict(
            sentences=sample['gpt3_sentences'],
            sample_passages=sample['gpt3_text_samples'],
        )
        scores_selfcheckgpt_prompt[sample['wiki_bio_test_idx']] = list(scores)

    with open(output_path, 'w') as outfile:
        json.dump(scores_selfcheckgpt_prompt, outfile)
        
# perform_selfcheck_prompt()

100%|██████████| 238/238 [6:20:37<00:00, 95.95s/it]   
