In [1]:
from sac3 import paraphraser
from sac3.evaluator_fast import Evaluate
from sac3.consistency_checker_fast import SemanticConsistnecyCheck
import time

In [2]:
# input information
question = 'is 3691 a prime number?'
target_answer = 'Yes, it is a prime number.'

# llm evaluation
llm_evaluate = Evaluate(model='gpt-3.5-turbo')

In [3]:
# self-evaluation 
t0 = time.time()
self_responses = llm_evaluate.self_evaluate(self_question = question, temperature = 1.0, self_num = 10)
print('self evaluation time', time.time()-t0)
print('self_responses', self_responses)

self evaluation time 56.77655100822449
self_responses ['To determine if 3691 is a prime number, we need to check if it is divisible by any number other than 1 and itself.\n\nBy trying to divide 3691 by numbers starting from 2 and going up to the square root of 3691 (which is approximately 60.74), we find that it is not divisible evenly by any whole number between 2 and the square root. Therefore, 3691 is a prime number.', 'Yes, 3691 is a prime number.', 'To determine if 3691 is a prime number, we need to check if it is divisible by any numbers other than 1 and itself.\n\nChecking divisibility, we can start by dividing 3691 by 2, but we find that it is not divisible evenly. \n\nNext, we try dividing it by odd numbers up to the square root of 3691, which is approximately 60.75. \n\nDividing 3691 by numbers like 3, 5, 7, 11, 13, 17, etc., we find that it is not divisible by any of these numbers. \n\nTherefore, we can conclude that 3691 is indeed a prime number.', 'To determine if 3691 is 

In [4]:
# fast self-evaluation
t1 = time.time()
fast_self_responses = llm_evaluate.self_evaluate_api(self_question = question, temperature = 1.0, self_num = 10)
print('fast self evaluation time', time.time()-t1)
print('fast self_responses', fast_self_responses)

fast self evaluation time 11.840163946151733
fast self_responses ['To determine if 3691 is a prime number, we can check if it has any factors other than 1 and itself. We can do this by dividing 3691 by all numbers from 2 to the square root of 3691. \n\nUpon dividing, we find that 3691 is not divisible evenly by any number between 2 and the square root of 3691. Therefore, 3691 is a prime number.\n\nTherefore, 3691 is a prime number.', 'To determine if 3691 is a prime number, we can check if it has any divisors other than 1 and itself. \n\nTo do this, we can check if any prime numbers less than the square root of 3691 divide evenly into it. \n\nThe square root of 3691 is approximately 60.828. \n\nChecking prime numbers up to 60, we find that 7 and 53 are divisors of 3691. \n\nTherefore, 3691 is not a prime number.', 'To determine if 3691 is a prime number, we can check if it is divisible by any whole number between 2 and the square root of 3691. \n\nThe square root of 3691 is approximate

In [5]:
# consistency check 
scc = SemanticConsistnecyCheck(model='gpt-3.5-turbo')

# consistency checker 
t2 = time.time()
consistency_res = scc.score_scc(question, target_answer, candidate_answers = self_responses, temperature = 0.0)
print('consistency check time', time.time()-t2)
print('consistency check result', consistency_res)

consistency check time 10.31699824333191
consistency check result (0.1, [0, 0, 0, 0, 0, 0, 1, 0, 0, 0])


In [6]:
# fast consistency checker 
t3 = time.time()
fast_consistency_res = scc.score_scc_api(question, target_answer, candidate_answers = fast_self_responses, temperature = 0.0)
print('fast consistency check time', time.time()-t3)
print('fast consistency check result', fast_consistency_res)

fast consistency check time 1.2028679847717285
fast consistency check result (0.1, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
