In [None]:
import pandas as pd
import sys
#PATH = '...' # Add the path to the current director. 
#sys.path.append(PATH)
from agents.ansari import Ansari
import random
import json


In [None]:
from jinja2 import Environment, FileSystemLoader

In [None]:
tenv = Environment(loader=FileSystemLoader('templates'))

In [None]:
q_temp = tenv.get_template('ask_question.txt')

In [None]:
df = pd.read_csv('batik-v1.csv')
cache  = {}

In [None]:
def answer_question(question): 
    print(f'Answering question: {question["question"]}')
    options = [o.strip() for o in question['options'].split(',')]
    prompt = q_temp.render(question=question['question'], options = options)
    if prompt in cache.keys(): 
        print(f'Found {question["question"]} in cache')
        return cache[prompt]
    ansari = Ansari()
    result = ''.join(filter(lambda x: x is not None, ansari.process_input(prompt)))
    print(f'Answer: {result}')
    cache[prompt] = result
    return result

In [None]:
len(cache.keys())

In [None]:
df['json_prediction'] = df.apply(answer_question, axis=1)




In [None]:
df['json_prediction']

In [None]:
def extract_prediction(row):
    raw = row['json_prediction']
    raw = raw.replace('```','')
    raw = raw.replace('json','')
    raw = '{' + raw.split('{')[1]
    raw = raw.split('}')[0] + '}'
    raw = raw.strip()
    print('raw is', raw)
    raw_dict = json.loads(raw)
    return str(raw_dict['answer'])

In [None]:
def is_correct(row):
    clean_prediction = row['predicted'].strip().lower()
    clean_correct = row['correct'].replace('Ayah','').strip().lower()
    return clean_correct == clean_prediction

In [None]:
df['predicted'] = df.apply(extract_prediction, axis=1)

In [None]:
df['correct_prediction'] = df.apply(is_correct, axis=1)

In [None]:
df['correct_prediction'].value_counts()

In [None]:
df[df['correct_prediction'] == False]

In [None]:
import litellm
def answer_question_gpt4(question): 
    print(f'Answering question: {question["question"]}')
    options = [o.strip() for o in question['options'].split(',')]
    prompt = q_temp.render(question=question['question'], options = options)
    response = litellm.completion(
        model = 'gpt-4-1106-preview',
        messages = [ {
                        'role': 'system', 
                        'content': 'You are a helpful assistant.'
                    },
                     {'role' : 'user', 
                      'content' : prompt 
                      } ],
        timeout = 30.0,
        temperature = 0.0,  
        metadata = {'generation-name': 'ansari'},  
        #response_format= { "type" : "json_object" }, 
        num_retries = 5                  
    )
    result = response.choices[0].message
    print(f'Answer: {result}')
    return result

In [None]:
df['gpt4_prediction'] = df.apply(answer_question_gpt4, axis=1)

In [None]:
def extract_prediction_gpt4(row):
    raw = row['gpt4_prediction'].content
    raw = raw.replace('```','')
    raw = raw.replace('json','')
    raw = '{' + raw.split('{')[1]
    raw = raw.split('}')[0] + '}'
    raw = raw.strip()
    print('raw is', raw)
    raw_dict = json.loads(raw)
    return str(raw_dict['answer'])

In [None]:
df['predicted_gpt4'] = df.apply(extract_prediction_gpt4, axis=1)

In [None]:
def is_correct_gpt4(row):
    clean_prediction = row['predicted_gpt4'].strip().lower()
    clean_correct = row['correct'].replace('Ayah','').strip().lower()
    return clean_correct == clean_prediction

In [None]:
df['gpt4_correct_prediction'] = df.apply(is_correct_gpt4, axis=1)

In [None]:
df['gpt4_correct_prediction'].value_counts()

In [None]:
df[df['correct_prediction'] != df['gpt4_correct_prediction']]