In [1]:
import re
import os
import time
import pandas as pd
from openai import OpenAI
from tqdm import tqdm
import json
import sys
import os
from dotenv import load_dotenv
DOTENV_FILE = ".env"
load_dotenv(DOTENV_FILE, override=True)
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../eval')
from utils import *


In [2]:
client = OpenAI()
label_mapping = {'yes':1, 'no':0, 'accurate':1, 'inaccurate': 0}
gpt_models = ['gpt-3.5-turbo', 'gpt-4-turbo', 'gpt-4o', 'gpt-4', 'o1-mini', 'o3-mini']

# Aggrefact prompt
system = 'Decide if the following summary is consistent with the correponding article. Note that consistency means all information in the summary is supported by the article.'
user = 'Article: {article}\nSummary: {summary}\nAnswer (yes or no):'

def call_gpt(system_prompt, user_prompt, model='gpt-4', temperature=0):
    if "o1" in model or "o3" in model:
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": system_prompt + '\n\n' + user_prompt}
            ],
        )
        result = completion.choices[0].message.content
        result = re.sub(r'[^\w\s]', '', result)
        result = result.strip().lower().split()
        try:
            result = label_mapping[result[0]]
        except:
            if "accurate" in result or "accurately" in result:
                result = 1
            elif "inaccurate" in result or "inaccurately" in result:
                result = 0
            else:
                print(result)
                result = None
    else:
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=temperature
        )
        result = completion.choices[0].message.content
        result = re.sub(r'[^\w\s]', '', result)
        result = label_mapping[result.strip().lower().split()[0]]
    return result


## Sample-level Prediction

In [None]:
file_name = '../assign/examples_to_annotate.csv'
# df = pd.read_csv('../assign/examples_to_annotate.csv')
# for index, row in df.iterrows():
#     sources.append(row['source'])

for idx, gpt_model in enumerate(gpt_models):
    print(f"Run model {idx} - {gpt_model}")
    df = pd.read_csv(file_name).fillna('')
    if f"{gpt_model}" in df:
            continue
    preds = []
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        # start_time = time.time()
        result = call_gpt(system, user.format(article=row['source'], summary=row['summary']), model=gpt_model)
        preds.append(result)
                    
    df.insert(len(df.columns.tolist()), f"{gpt_model}", preds)
    df.to_csv(file_name, mode='w', index=False, header=True)

In [None]:
facts_system = '''Your task is to check if the Response is accurate to the Evidence.
Generate \'Accurate\' if the Response is accurate when verified according to the Evidence,
or \'Inaccurate\' if the Response is inaccurate (contradicts the evidence) or cannot be
verified.'''
facts_user = '''**Evidence**\n\n{article}\n\n**End of Evidence**\n
**Response**:\n\n{summary}\n\n**End of Response**\n
Let's think step-by-step.'''

def load_existing_predictions(filename):
    try:
        with open(filename, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        return {}

def save_prediction_to_file(filename, data):
    with open(filename, 'w') as file:
        json.dump(data, file)

for idx, gpt_model in enumerate(gpt_models):
    print(f"Run model {idx} - {gpt_model}")
    df = pd.read_csv(file_name).fillna('')
    column_name = f"{gpt_model}-FACTSprompt"
    if column_name in df:
        continue
    
    # Load or initialize prediction storage
    predictions_filename = f"{gpt_model}_predictions.json"
    predictions = load_existing_predictions(predictions_filename)
    print(predictions)
    preds = list(predictions.values())
    # preds = [predictions.get(str(index), '') for index in df.index]  # Load existing or use empty string
    print(len(preds))
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        # start_time = time.time()
        # if preds[index]:  # Skip already predicted
        #     continue
        if index < len(preds):
            continue
        result = call_gpt(facts_system, facts_user.format(article=row['source'], summary=row['summary']), model=gpt_model)
        preds.append(result)
        predictions[str(index)] = result
        save_prediction_to_file(predictions_filename, predictions)
                    
    df.insert(len(df.columns.tolist()), f"{gpt_model}-FACTSprompt", preds)
    df.to_csv(file_name, mode='w', index=False, header=True)


## Sent-level Prediction

In [None]:
sent_level_labels = {}
result_files, skip_sample_ids, selected_annotators, num_annotators = process_result_files()
for file_path in result_files:
    _, _, _, batch_sent_level_labels = read_annotation(file_path, skip_sample_ids=skip_sample_ids)
    # print(batch_sent_level_labels)
    sent_level_labels.update(batch_sent_level_labels)

gpt_models = ["o3-mini"]

fname = '../eval/sent_level_results/detectors_sent_level_preds.json'
sources = []
df = pd.read_csv('../assign/examples_to_annotate.csv')
for index, row in df.iterrows():
    sources.append(row['source'])

for idx, gpt_model in enumerate(gpt_models):
    print(f"Run model {idx} - {gpt_model}")
    existing_meta_ids = []
    data = {}
    if os.path.exists(fname):
        with open(fname) as r:
            data = json.load(r)
            for meta_id in data:
                # print(list(data[meta_id].values())[0])
                if gpt_model in list(data[meta_id].values())[0]:
                    existing_meta_ids.append(meta_id)
                
    for meta_id in tqdm(sent_level_labels):
        meta_id = str(meta_id)
        if meta_id in data:
            item = data[meta_id]
            # print(item)
        else:
            item = {}
        for sent, sent_labels in sent_level_labels[int(meta_id)].items():
            if sent not in item:
                item[sent] = {'labels': sent_labels}
            if meta_id not in existing_meta_ids:
                result = call_gpt(system, user.format(article=sources[int(meta_id)], summary=sent), model=gpt_model)
                item[sent][gpt_model] = result
            
        # print(item)
        if os.path.exists(fname):
            with open(fname, 'r') as f:
                json_data = json.load(f)
                json_data[meta_id] = item
        else:
            json_data = {meta_id:item}
        with open(fname, 'w') as f:
            f.write(json.dumps(json_data, indent=2))
    