In [1]:
import re
import pandas as pd
from trulens.core import Feedback
from trulens.providers.openai import OpenAI as fOpenAI


In [2]:
def parse_evidence_scores(evidence_scores: str=None):
    # if evidence_scores is None:
    #     evidence_scores = "The Product Support Portal is mentioned as a resource for information about the product in the source.\nScore: 10\n\nThe Product Developer is listed as a resource for information about the product in the source.\nScore: 10\n\nThe source mentions a Product Playlist on YouTube as a resource for information about the product.\nScore: 10\n\nThe Product Tech Advisors channel on YouTube is mentioned as a resource for information about the product in the source.\nScore: 10\n\nThe source includes a Product Forum as a resource for information about the product.\nScore: 10\n\nNOTHING FOUND\nScore: 0\n"

    pattern = re.compile(r"(.+?)\nScore: (\d+)\n", re.DOTALL)
    pairs = re.findall(pattern, evidence_scores)
    evidences = [
        {
            'evidence': pair[0].strip(), 
            'score': int(pair[1])
        }
        for pair in pairs
    ]
    return evidences
    # pairs = [(pair[0].strip(), int(pair[1])) for pair in pairs]
    # return pairs
    # evidence_scores = {}
    # for pair in pairs:
    #     evidence_scores[pair[0].strip()] = int(pair[1])
    # return evidence_scores
def parse_reasons(reasons):
    reasons_list = []
    for reason in reasons.split(r'STATEMENT ')[1:]: 
        print(reason)
        pattern = re.compile(r"(\d+):\nCriteria: (.*)\nSupporting Evidence: (.*)", re.DOTALL)
        matches = re.match(pattern, reason)
        print('**matches',matches)
        if matches:
            statement_id = matches.group(1)
            criteria = matches.group(2)
            evidence_score_dict_list = parse_evidence_scores(matches.group(3))
            print('evidence_score_dict_list:', evidence_score_dict_list)
            criteria = criteria.split("\n")

            triplets = [
                {
                    'hypothesis sentence': criterion, 
                    'support in source': evidence_score_dict['evidence'], 
                    'score': evidence_score_dict['score']
                }
                for criterion, evidence_score_dict in zip(criteria, evidence_score_dict_list)
            ]
        else:
            print(reason)
            triplets = {}
        reasons_list.append(triplets)

    return reasons_list


In [3]:
MODEL='gpt-4o'
provider = fOpenAI(model_engine=MODEL)
df = pd.read_csv('../assign/examples_to_annotate.csv', encoding='utf-8')
scores = []
reasons = []
for index, row in df.iterrows():
    # print(row)
    try:
        feedback = provider.groundedness_measure_with_cot_reasons(source=row['source'], statement=row['summary'])
        print(feedback)
        groundness_score = feedback[0]
        print(groundness_score)            
        scores.append(groundness_score)
        generated_reasons = feedback[1]['reasons']
        reasons.append(generated_reasons)
    except:
        feedback = provider.groundedness_measure_with_cot_reasons(source=row['source'], statement=row['summary'])
        print(feedback)
        print('cannot parse score')
        scores.append(None)
        reasons.append(None)
df.insert(len(df.columns), f'Trulens_{MODEL}_scores', scores)
df.insert(len(df.columns), f'Trulens_{MODEL}_reasons', reasons)
df.to_csv('../assign/examples_to_annotate.csv', mode='w', index=False, header=True)
            



(1.0, {'reasons': 'STATEMENT 0:\nCriteria: In the \'mini-derby\', Manchester United\'s Under 18s defeated City\'s Under 18s 1-0.\nSupporting Evidence: The source states, "First blood to United after their Under 18s saw off City 1-0 in the \'mini-derby\'."\nScore: 1.0\nSTATEMENT 1:\nCriteria: The match was played at Altrincham\'s home, a 6,000-capacity stadium, and was free for spectators.\nSupporting Evidence: The source states, "Altrincham\'s compact 6,000-capacity home may not be Old Trafford, but it does have a proud history of its own." It also mentions, "Entry was free and close to 1,000 gathered on the seats and terraces of Moss Lane."\nScore: 1.0\nSTATEMENT 2:\nCriteria: The majority of the players were English, with 10 being Mancunian.\nSupporting Evidence: The source states that "no less than 13 out of the 22 players on show at kick-off were English. Of those, 10 were Mancunian."\nScore: 1.0\nSTATEMENT 3:\nCriteria: Callum Gribbin, an England Under 17 star, scored the winning 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


(nan, {'reasons': ''})
nan
(nan, {'reasons': ''})
nan
(1.0, {'reasons': 'STATEMENT 0:\nCriteria: In a match between the Under 18s of Manchester United and Manchester City, United won 1-0 thanks to a free-kick from Callum Gribbin.\nSupporting Evidence: The source states, "First blood to United after their Under 18s saw off City 1-0 in the \'mini-derby\'... Callum Gribbin was the matchwinner for Manchester United with a delightful free-kick."\nScore: 1.0\nSTATEMENT 1:\nCriteria: The game, played at Altrincham\'s Moss Lane, featured a high proportion of English and local players.\nSupporting Evidence: The source states that the match was held at Altrincham\'s Moss Lane and mentions that 13 out of the 22 players at kick-off were English, with 10 being Mancunian, indicating a high proportion of English and local players.\nScore: 1.0\nSTATEMENT 2:\nCriteria: United\'s victory puts them two points behind league leaders Middlesbrough with a game in hand.\nCity have now lost both of their end-o