In [104]:
import pandas as pd
import numpy as np
import math

In [102]:
df = pd.read_csv('answered_questions_rationales.csv')

In [106]:
df.columns

Index(['question', 'choices', 'final_answer', 'if_else', 'contrastive',
       'neutral', 'consensus', 'causal', 'comparative', 'historical'],
      dtype='object')

In [39]:
class UCS:
    def __init__(self) -> None:
        pass
    
    def token_probabilities(self, generations):
        """
        Compute the token probabilities for each word in the generations.
        """
        total_tokens = sum(len(gen.split()) for gen in generations)
        token_freq = {}
        for gen in generations:
            for token in gen.split():
                token_freq[token] = token_freq.get(token, 0) + 1
        return {token: freq / total_tokens for token, freq in token_freq.items()}
    
    def compute_UCS(self, generation, token_probs):
        """
    Compute the unigram consistency score (UCS) for a generation.
    """
        return sum(token_probs.get(token, 0) for token in generation.split())
    
    def compute_WUCS(self, generation, token_probs):
        """
        Compute the weighted unigram consistency score (WUCS) for a generation.
        """
        return sum(token_probs.get(token, 0) ** 2 for token in generation.split())
    
    def compute_Consensus_WUCS(self, generation, token_probs):
        """
        Compute the Consensus-WUCS for a generation.
        """
        WUCS = self.compute_WUCS(generation, token_probs)
        mean_log_prob = sum(
            math.log(token_probs.get(token, 1)) for token in generation.split()
        ) / len(generation.split())
        return WUCS * math.exp(mean_log_prob)

In [41]:
ucs = UCS()

In [42]:

# Example usage
generations = ["The sky is blue.", "The sky looks blue.", "Blue is the color of the sky."]
token_probs = ucs.token_probabilities(generations)

generation_to_evaluate = "The sky is blue."
ucs_score = ucs.compute_UCS(generation_to_evaluate, token_probs)
print(f"UCS for '{generation_to_evaluate}' is: {ucs_score:.4f}")


UCS for 'The sky is blue.' is: 0.5333


In [43]:
# Example usage
generations = ["The sky is blue.", "The sky looks blue.", "Blue is the color of the sky."]
token_probs = ucs.token_probabilities(generations)

generation_to_evaluate = "The sky is blue."
wucs_score = ucs.compute_WUCS(generation_to_evaluate, token_probs)
print(f"WUCS for '{generation_to_evaluate}' is: {wucs_score:.4f}")

WUCS for 'The sky is blue.' is: 0.0711


In [44]:

# Example usage
generations = ["The sky is blue.", "The sky looks blue.", "Blue is the color of the sky."]
token_probs = ucs.token_probabilities(generations)

generation_to_evaluate = "The sky is blue."
consensus_wucs_score = ucs.compute_Consensus_WUCS(generation_to_evaluate, token_probs)
print(f"Consensus-WUCS for '{generation_to_evaluate}' is: {consensus_wucs_score:.4f}")

Consensus-WUCS for 'The sky is blue.' is: 0.0095


In [117]:
lines = df[['contrastive', 'neutral', 'consensus', 'causal', 'comparative', 'historical']].values
lines.shape

(10962, 6)

In [118]:
def get_highest_rationales(lines, type='UCS'):
    # Example usage
    res = []
    for line in lines:
        line_res = {}
        generations = line
        token_probs = ucs.token_probabilities(generations)

        for j in range(len(line)):
            generation_to_evaluate = line[j]
            if type=='UCS':
                consensus_wucs_score = ucs.compute_UCS(generation_to_evaluate, token_probs)
            elif type=='WUCS':
                consensus_wucs_score = ucs.compute_WUCS(generation_to_evaluate, token_probs)
            elif type=='Consensus-WUCS':
                consensus_wucs_score = ucs.compute_Consensus_WUCS(generation_to_evaluate, token_probs)
            
            line_res[line[j]] = consensus_wucs_score
        
        # get the rationale with the highest UCS score
        max_key = max(line_res, key=line_res.get)
        res.append(max_key)

    return res

In [119]:
ucs_score = get_highest_rationales(lines, type='UCS')
wucs_score = get_highest_rationales(lines, type='WUCS')
consensus_wucs_score = get_highest_rationales(lines, type='Consensus-WUCS')

In [120]:
df['ucs_score'] = ucs_score
df['wucs_score'] = wucs_score
df['consensus_wucs_score'] = consensus_wucs_score

In [124]:
df.to_csv('answered_questions_rationales_UCS_WUCS_CWUCS.csv', index=False)

In [127]:
df

Unnamed: 0,question,choices,final_answer,if_else,contrastive,neutral,consensus,causal,comparative,historical,ucs_score,wucs_score,consensus_wucs_score
0,"""There are 10 apples on an apple tree. Three ...","['park', 'coloring book', 'garden center', 'ma...",park,Let's examine each option meticulously to make...,The choice of 'park' provides a meaningful con...,The option of 'park' manages to encapsulate th...,"In a general sense, 'park' would likely gain t...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'park' provides a meaningful con...,The choice of 'park' provides a meaningful con...
1,"A John is a bum. Much like the stereotype, he...","['bus depot', 'beach', 'train station', 'bridg...",bridge,Let's examine each option meticulously to make...,The choice of 'bridge' provides a meaningful c...,The option of 'bridge' manages to encapsulate ...,"In a general sense, 'bridge' would likely gain...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'bridge' provides a meaningful c...,The choice of 'bridge' provides a meaningful c...
2,A bad person places little value on being hone...,"['excellent', 'upright', 'premium', 'competent...",excellent,Let's examine each option meticulously to make...,The choice of 'excellent' provides a meaningfu...,The option of 'excellent' manages to encapsula...,"In a general sense, 'excellent' would likely g...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'excellent' provides a meaningfu...,The choice of 'excellent' provides a meaningfu...
3,"A bald eagle flies over St. Paul, where is it?","['texas', 'thermal', 'minnesota', 'canada', 'p...",texas,Let's examine each option meticulously to make...,The choice of 'texas' provides a meaningful co...,The option of 'texas' manages to encapsulate t...,"In a general sense, 'texas' would likely gain ...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'texas' provides a meaningful co...,The choice of 'texas' provides a meaningful co...
4,A battleship is a powerful vessel. If you nee...,"['yatch', 'corvette', 'aircraft carrier', 'des...",yatch,Let's examine each option meticulously to make...,The choice of 'yatch' provides a meaningful co...,The option of 'yatch' manages to encapsulate t...,"In a general sense, 'yatch' would likely gain ...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'yatch' provides a meaningful co...,The choice of 'yatch' provides a meaningful co...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10957,what does someone have that causes them commit...,"['problems', 'distress', 'fear', 'go to jail',...",problems,Let's examine each option meticulously to make...,The choice of 'problems' provides a meaningful...,The option of 'problems' manages to encapsulat...,"In a general sense, 'problems' would likely ga...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'problems' provides a meaningful...,The choice of 'problems' provides a meaningful...
10958,what is printed with ink and distributed daily?,"['fountain pen', 'squid', 'newspaper', 'book',...",fountain pen,Let's examine each option meticulously to make...,The choice of 'fountain pen' provides a meanin...,The option of 'fountain pen' manages to encaps...,"In a general sense, 'fountain pen' would likel...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...",The choice of 'fountain pen' provides a meanin...,The choice of 'fountain pen' provides a meanin...,The choice of 'fountain pen' provides a meanin...
10959,when communicating with my boss what should i do,"['misunderstandings', 'transfer of information...",misunderstandings,Let's examine each option meticulously to make...,The choice of 'misunderstandings' provides a m...,The option of 'misunderstandings' manages to e...,"In a general sense, 'misunderstandings' would ...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'misunderstandings' provides a m...,The choice of 'misunderstandings' provides a m...
10960,where is a good place to obtain new soap?,"['supermarket', 'washing', 'cabinet', 'own hom...",supermarket,Let's examine each option meticulously to make...,The choice of 'supermarket' provides a meaning...,The option of 'supermarket' manages to encapsu...,"In a general sense, 'supermarket' would likely...",Examining the question and the possible answer...,When lined up against other available choices ...,"Drawing from past instances, previous quizzes,...","Drawing from past instances, previous quizzes,...",The choice of 'supermarket' provides a meaning...,The choice of 'supermarket' provides a meaning...
