In [1]:
import os
import jsonlines
import numpy as np
from utils import *
import pandas as pd
from openai import OpenAI

In [2]:
client = OpenAI()
MODEL='gpt-4o'

# Aggrefact prompt
# system = """Determine whether the provided claim can be inferred from the given sentence, using the sentence's context as a reference. A claim is considered to belong to a sentence if it can be logically derived from the sentence while accounting for its contextual meaning. Note that the claim must correspond to a single sentence within the provided context."""
# user = "Contenxt: {summary}\nSentence: {summary_sent}\nClaim: {claim}\nAnswer (yes or no):"
system = """Given a context and a list of sentences (each with an associated index), determine which sentence supports the derivation of the provided claim from the given context. Your response should be the index of the relevant sentence."""
user = "Claim: {claim}\nSentence List: {summary_sents}\nAnswer (sentence index, an integer):"

def call_gpt(system_prompt, user_prompt, model='gpt-4', temperature=0):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=temperature
    )

    # print(completion.choices[0].message.content)
    return completion.choices[0].message.content

In [3]:
ragas_results = {}
with open('ragas_claim_level_preds.jsonl') as reader:
    meta_id = 0
    for record in jsonlines.Reader(reader):
        ragas_results[meta_id] = record
        meta_id += 1

In [4]:
# from batch ID to the corresponding sample IDs
skip_samples = {
    5: range(40, 50), 
    10: range(10,20),
    11: range(10),
    12: range(20, 30), 
    15: range(40, 50)
}

annotator_list = {
    7: ['yujia', 'manveer'],
    8: ['miaoran', 'chenyu'],
    10: ['erana', 'vivek', 'manveer'],
    11: ['rogger', 'matt'], #,'matt', , 'new', 'yujia'
    13: ['erana', 'miaoran'], #['erana', 'weisi', 'miaoran']
    16: ['miaoran', 'matt'] #['miaoran', 'yujia', 'matt', 'weisi', 'new']
}
# batch 16
# ['yujia', 'matt']: 0.299	0.267	0.209	0.294	0.244	0.340	0.330
# ['yujia', 'weisi']: -0.417	-0.376	-0.333	-0.384	-0.320	-0.391	-0.387
# ['matt', 'weisi']: -0.138	-0.117	-0.074	-0.104	-0.065	-0.147	-0.092
exclude_batch = []
# exclude_batch = [11,13,16]

In [5]:
sent_level_labels = {}
result_path = 'batch_5_src_no_sports/results'
for batch_id in range(1,16+1):
    if batch_id in exclude_batch:
        continue
    file_path = os.path.join(result_path, f"batch_{batch_id}_annotation.json")
    skip_sample_ids = []
    if batch_id in skip_samples:
        skip_sample_ids = [str(s_id) for s_id in skip_samples[batch_id]]
        print (f"Skipping samples {skip_sample_ids}")
    selected_annotators = None
    # there is an unexpected "new" annotator in batch 7
    if batch_id in annotator_list:
        selected_annotators = annotator_list[batch_id]
    _, _, _, batch_sent_level_labels = read_annotation(file_path, summary_sent_file='summary_sent_list.jsonl', skip_sample_ids=skip_sample_ids)
    # print(batch_sent_level_labels)
    sent_level_labels.update(batch_sent_level_labels)
print(sent_level_labels)
    
    

Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']
Skipping samples ['10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
Skipping samples ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
Skipping samples ['20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']


In [None]:
existing_meta_ids = []
if os.path.exists('processed_ragas_claim_level_preds.jsonl'):
    with open('processed_ragas_claim_level_preds.jsonl') as r:
        for p in jsonlines.Reader(r):
            existing_meta_ids.append(p['meta_id'])


for meta_id in sent_level_labels:
    if meta_id in existing_meta_ids:
        continue
    print('='*30)
    print(meta_id)
    item = {'meta_id': meta_id,'results': {}}
    
    ragas_result = ragas_results[meta_id]
    for subresult in ragas_result:
        print('-'*20)
        print(subresult['statement'])
        summary = ''.join(list(sent_level_labels[meta_id].keys()))
        
            
        for sent, sent_labels in sent_level_labels[meta_id].items():
            if sent not in item['results']:
                item['results'][sent] = {'labels': sent_labels, 'claims':[], 'claim_preds':[]}
        sent_lst = [f"sentence idx: {idx}\n{sent}" for idx, sent in enumerate(list(sent_level_labels[meta_id].keys()))]
        result = call_gpt(system, user.format(claim=subresult['statement'],summary_sents=sent_lst), model=MODEL)
        print(result)
        sent = list(sent_level_labels[meta_id].keys())[int(result)]
        print(sent)
        item['results'][sent]['claims'].append(subresult['statement'])
        item['results'][sent]['claim_preds'].append(subresult['verdict'])
        
    print(item)
    if os.path.exists('processed_ragas_claim_level_preds.jsonl'):
        mode = 'a' 
    else:
        mode = 'w'
    with jsonlines.open('processed_ragas_claim_level_preds.jsonl', mode=mode) as writer:
        writer.write(item)

262
--------------------
The passage describes a court case involving a sexual assault.


0
This passage describes a court case involving a sexual assault that occurred in 2004. 
--------------------
The sexual assault occurred in 2004.
0
This passage describes a court case involving a sexual assault that occurred in 2004. 
--------------------
A 15-year-old girl was raped by David Ayrton.
2
1. A 15-year-old girl was raped by David Ayrton in a Portsmouth garage.


--------------------
The rape occurred in a Portsmouth garage.
2
1. A 15-year-old girl was raped by David Ayrton in a Portsmouth garage.


--------------------
The defendant is now 34 years old.
3
2. The defendant, now 34 and known as Davina Ayrton, denied the charge but was found guilty.


--------------------
The defendant is known as Davina Ayrton.
3
2. The defendant, now 34 and known as Davina Ayrton, denied the charge but was found guilty.


--------------------
Davina Ayrton denied the charge.
3
2. The defendant, now 34 and known as Davina Ayrton, denied the charge but was found guilty.


-------------------