In [1]:
import os
import jsonlines
import numpy as np
from utils import *
import pandas as pd
import re

In [2]:
def parse_statements(input_string):
    """
    Parses a structured input string containing multiple statements into a dictionary.

    Args:
        input_string (str): The input string with statements, criteria, supporting evidence, and scores.

    Returns:
        dict: A dictionary where each statement ID maps to its details.
    """
    # Pattern to match each statement
    pattern = r"STATEMENT (\d+):\nCriteria: (.*?)\nSupporting Evidence: (.*?)\nScore: ([\d.]+)"

    # Find all matches
    matches = re.findall(pattern, input_string, re.DOTALL)

    # Parse into a dictionary
    statements_dict = {}
    for match in matches:
        statement_id = int(match[0])
        criteria = match[1].strip()
        if len(criteria) < 3:
            continue
        supporting_evidence = match[2].strip()
        score = float(match[3])
        statements_dict[f'Statement{statement_id}'] = {
            "Criteria": criteria,
            "Supporting Evidence": supporting_evidence,
            "Score": score,
        }

    return statements_dict

In [3]:
df = pd.read_csv('examples_to_annotate.csv').fillna('')
df = df.replace({np.nan: None})
trulens_results = df['Trulens_gpt-4o_reasons'].values.tolist()
parsed_trulens_results = []
for idx, input_string in enumerate(trulens_results):
    try: 
        parsed_trulens_results.append(parse_statements(input_string))
    except:
        print(idx)
        print(input_string)
        print(df.iloc[idx])
        break


In [4]:
parsed_trulens_results[0]

{'Statement0': {'Criteria': "In the 'mini-derby', Manchester United's Under 18s defeated City's Under 18s 1-0.",
  'Supporting Evidence': 'The source states, "First blood to United after their Under 18s saw off City 1-0 in the \'mini-derby\'."',
  'Score': 1.0},
 'Statement1': {'Criteria': "The match was played at Altrincham's home, a 6,000-capacity stadium, and was free for spectators.",
  'Supporting Evidence': 'The source states, "Altrincham\'s compact 6,000-capacity home may not be Old Trafford, but it does have a proud history of its own." It also mentions, "Entry was free and close to 1,000 gathered on the seats and terraces of Moss Lane."',
  'Score': 1.0},
 'Statement2': {'Criteria': 'The majority of the players were English, with 10 being Mancunian.',
  'Supporting Evidence': 'The source states that "no less than 13 out of the 22 players on show at kick-off were English. Of those, 10 were Mancunian."',
  'Score': 1.0},
 'Statement3': {'Criteria': 'Callum Gribbin, an England Un

In [5]:
# from batch ID to the corresponding sample IDs
skip_samples = {
    5: range(40, 50), 
    10: range(10,20),
    11: range(10),
    12: range(20, 30), 
    15: range(40, 50)
}

annotator_list = {
    7: ['yujia', 'manveer'],
    8: ['miaoran', 'chenyu'],
    10: ['erana', 'vivek', 'manveer'],
    11: ['rogger', 'matt'], #,'matt', , 'new', 'yujia'
    13: ['erana', 'miaoran'], #['erana', 'weisi', 'miaoran']
    16: ['miaoran', 'matt'] #['miaoran', 'yujia', 'matt', 'weisi', 'new']
}
# batch 16
# ['yujia', 'matt']: 0.299	0.267	0.209	0.294	0.244	0.340	0.330
# ['yujia', 'weisi']: -0.417	-0.376	-0.333	-0.384	-0.320	-0.391	-0.387
# ['matt', 'weisi']: -0.138	-0.117	-0.074	-0.104	-0.065	-0.147	-0.092
exclude_batch = []
# exclude_batch = [11,13,16]

In [6]:
sent_level_labels = {}
result_path = 'batch_5_src_no_sports/results'
for batch_id in range(1,16+1):
    if batch_id in exclude_batch:
        continue
    file_path = os.path.join(result_path, f"batch_{batch_id}_annotation.json")
    skip_sample_ids = []
    if batch_id in skip_samples:
        skip_sample_ids = [str(s_id) for s_id in skip_samples[batch_id]]
        print (f"Skipping samples {skip_sample_ids}")
    selected_annotators = None
    # there is an unexpected "new" annotator in batch 7
    if batch_id in annotator_list:
        selected_annotators = annotator_list[batch_id]
        
    _, _, _, batch_sent_level_labels = read_annotation(file_path, skip_sample_ids=skip_sample_ids)
    # print(sent_level_labels)
    sent_level_labels.update(batch_sent_level_labels)
# print(sent_level_labels)
    

Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']
Skipping samples ['10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
Skipping samples ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
Skipping samples ['20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']


In [7]:
for idx in sent_level_labels:
    if len(sent_level_labels[idx]) < len(parsed_trulens_results[idx]):
        print(idx)
        print(len(sent_level_labels[idx]))
        print(list(sent_level_labels[idx].keys()))
        print(len(parsed_trulens_results[idx]))
        print([parsed_trulens_results[idx][statement]['Criteria'] for statement in parsed_trulens_results[idx]])

236
5
['I apologize, but there appears to be some confusion in the passage provided.', 'The passage contains two unrelated statements about different songs/albums called "Hourglass":  . " Hourglass" is a song by the British electronic duo Disclosure.', '. " Hourglass" is also the name of singer-songwriter James Taylor\'s fourteenth studio album.', 'These are two separate pieces of information about different musical works that happen to share the same title.', 'There is no additional context or connection provided between these two statements in the given passage.']
6
['I apologize, but there appears to be some confusion in the passage provided.', 'The passage contains two unrelated statements about different songs/albums called "Hourglass":', '"Hourglass" is a song by the British electronic duo Disclosure.', '"Hourglass" is also the name of singer-songwriter James Taylor\'s fourteenth studio album.', 'These are two separate pieces of information about different musical works that happ

In [8]:
text1 = 'As of the date mentioned, over 190 countries and 200 territories have reported more than cases of COVID-19, leading to over deaths. ( Note: The actual numbers for cases and deaths are missing in the provided passage and should be replaced with the specific figures.)'
text2 = 'The passage then focuses on their rematch, where Leonard used his speed and movement to frustrate Duran, leading to Duran quitting the fight by saying "no mas." The passage concludes by mentioning that Duran\'s reputation took three years to rebuild and that Leonard took pride in making Duran quit rather than knocking him out.'
text3 = '( Note: The actual numbers for cases and deaths are missing in the provided passage and should be replaced with the specific figures.)'

print(split_text_to_sentences_with_indices(text1))
print(split_text_to_sentences_with_indices(text2))
print(split_text_to_sentences_with_indices(text3))

[('As of the date mentioned, over 190 countries and 200 territories have reported more than cases of COVID-19, leading to over deaths.', 0, 130), ('( Note: The actual numbers for cases and deaths are missing in the provided passage and should be replaced with the specific figures.', 132, 264), (')', 265, 265)]
[('The passage then focuses on their rematch, where Leonard used his speed and movement to frustrate Duran, leading to Duran quitting the fight by saying "no mas".', -1, 158), ("The passage concludes by mentioning that Duran's reputation took three years to rebuild and that Leonard took pride in making Duran quit rather than knocking him out.", 161, 326)]
[('( Note: The actual numbers for cases and deaths are missing in the provided passage and should be replaced with the specific figures.', 0, 132), (')', 133, 133)]


In [9]:
text = 'Mr. Mole, ridden by AP McCoy, is one of three co-favorites in the Grade One Celebration Chase at Sandown on Saturday.'
start_idx = 0
sentence_endings = re.finditer(r'[.!?](?:\s+|$)', text)
for match in sentence_endings:
    end_idx = match.end() - 1
    sentence = text[start_idx:end_idx + 1].strip()
    if sentence:  # Avoid appending empty sentences
        print(sentence, start_idx, end_idx)
    start_idx = match.end()

Mr. 0 3
Mole, ridden by AP McCoy, is one of three co-favorites in the Grade One Celebration Chase at Sandown on Saturday. 4 116


In [10]:
# pd display only 3 digits
pd.set_option('display.float_format', lambda x: '%.3f' % x)
df = pd.DataFrame.from_dict(agreement_results, orient='index')
df

NameError: name 'agreement_results' is not defined

In [None]:
df.mean()

Series([], dtype: float64)

In [None]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
df = pd.DataFrame.from_dict(span_agreement_results, orient='index')
df