## Set Up

#### Libraries

In [None]:
import sys
sys.path.append("self_check_gpt")

import os
from pathlib import Path
import json
import re
import time
from tqdm import tqdm
from dotenv import load_dotenv

import pandas as pd

import warnings
warnings.filterwarnings("ignore")

from helper.utils import read_data
from self_check_gpt import modeling_selfcheck

#### Datasets

In [None]:
FOLDER_PATH_HALUEVAL = "data/halu_eval_2"
FILE_PATH_SELFCHECKGPT = "data/self_check_gpt/dataset_v3.json"

dataset_halueval, dataset_selfcheckgpt = read_data(FOLDER_PATH_HALUEVAL, FILE_PATH_SELFCHECKGPT)

#### Metrics

In [None]:
SENTENCE_ENDINGS = r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!|\n)\s*'
API_CALL_SLEEP = 5 

load_dotenv()
api_key = os.getenv('UPSTAGE_API_KEY')

selfcheckgpt_prompt = modeling_selfcheck.SelfCheckAPIPrompt(
    client_type="openai", 
    base_url="https://api.upstage.ai/v1/solar", 
    model="solar-pro", 
    api_key=api_key)
selfcheckgpt_unigram = modeling_selfcheck.SelfCheckNgram(
    n=1
)

## Testing
### Benchmark: Halu Eval

In [None]:
output_folder_halueval = "data\scores\halu_eval_2"

for category in dataset_halueval.keys():
    output_path_category = Path(os.path.join(output_folder_halueval, category))
    
    if not output_path_category.is_file():
        output_path_category.mkdir(parents=True, exist_ok=True)

#### SelfCheckGPT Unigram

In [None]:
def perform_halueval_unigram():
    for category in dataset_halueval.keys():
        samples = dataset_halueval[category]
        output_path = os.path.join(output_folder_halueval, category, "scores_selfcheckgpt_1gram.json")
        scores_halueval_unigram = {}
        
        for i, sample in enumerate(tqdm(samples)):
            response = sample['chatgpt_response']
            setences = re.split(SENTENCE_ENDINGS, response)
            sentences = [s.strip() for s in setences if s.strip()]
            
            scores_halueval_unigram[sample['id']] = selfcheckgpt_unigram.predict(
                passage=response,
                sentences=sentences,
                sampled_passages=sample['sample_passages'],
            )
        
        with open(output_path, 'w') as outfile:
            json.dump(scores_halueval_unigram, outfile)
    
# perform_halueval_unigram()

#### SelfCheckGPT Prompt API - Solar Pro

In [None]:

def perform_halueval_prompt():
    for category in dataset_halueval.keys():
        print(f"Processing {category}")
        samples = dataset_halueval[category]
        output_path = os.path.join(output_folder_halueval, category, "scores_selfcheckgpt_prompt_solar_pro.json")
        scores_halueval_prompt = {}
        
        for i, sample in enumerate(tqdm(samples)):
            response = sample['chatgpt_response']
            setences = re.split(SENTENCE_ENDINGS, response)
            sentences = [s.strip() for s in setences if s.strip()]
            
            scores = selfcheckgpt_prompt.predict(
                sentences=sentences,
                sample_passages=sample['sample_passages'],
            )
            scores_halueval_prompt[sample['id']] = list(scores)
            time.sleep(API_CALL_SLEEP)
        
        with open(output_path, 'w') as outfile:
            json.dump(scores_halueval_prompt, outfile)
            
perform_halueval_prompt()

### Benchmark: SelfCheckGPT

In [None]:
output_folder_selfcheckgpt = "data\scores\self_check_gpt"
output_path = Path(output_folder_selfcheckgpt)

if not output_path.is_file():
    output_path.mkdir(parents=True, exist_ok=True)

#### SelfCheckGPT Unigram

In [None]:
def perform_selfcheckgpt_unigram():
    scores_selfcheckgpt_unigram = {} 
    output_path = os.path.join(output_folder_selfcheckgpt, "scores_selfcheckgpt_1gram.json")

    for i, sample in enumerate(tqdm(dataset_selfcheckgpt)):
        scores_selfcheckgpt_unigram[sample['wiki_bio_test_idx']] = selfcheckgpt_unigram.predict(
            passage=sample['gpt3_text'],
            sentences=sample['gpt3_sentences'],
            sampled_passages=sample['gpt3_text_samples'],
        )
        
    with open(output_path, 'w') as outfile:
        json.dump(scores_selfcheckgpt_unigram, outfile)
        
# perform_selfcheckgpt_unigram()

#### SelfCheckGPT Prompt API - Solar Pro

In [None]:
def perform_selfcheck_prompt():
    scores_selfcheckgpt_prompt = {}
    output_path = os.path.join(output_folder_selfcheckgpt, "scores_selfcheckgpt_prompt_solar_pro.json")

    for i, sample in enumerate(tqdm(dataset_selfcheckgpt)):
        scores = selfcheckgpt_prompt.predict(
            sentences=sample['gpt3_sentences'],
            sample_passages=sample['gpt3_text_samples'],
        )
        scores_selfcheckgpt_prompt[sample['wiki_bio_test_idx']] = list(scores)
        time.sleep(API_CALL_SLEEP)

    with open(output_path, 'w') as outfile:
        json.dump(scores_selfcheckgpt_prompt, outfile)
        
perform_selfcheck_prompt()