# 1. Imports

In [1]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_core.messages import HumanMessage, SystemMessage
from openai import AsyncOpenAI
from anthropic import AsyncAnthropic
from langchain.llms import HuggingFaceHub
import os

import pandas as pd
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import csv
#import seaborn as sns

import asyncio
from aiolimiter import AsyncLimiter
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

from models import *

os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
os.environ["ANTHROPIC_API_KEY"] = ""
os.environ["OPENAI_API_KEY"] = ""

# 2. Data Imports

## 2.1 US FFA Exam

In [50]:
ffa_soil_practicum = pd.read_csv(r'../regional_best_practices/soil_exams/ffa_soil_practicum.csv')
ffa_soil_practicum

Unnamed: 0,question,answer,region
0,Soil has two key characteristics that help to ...,B,USA
1,"World‐wide, soils have been classified into so...",C,USA
2,What soil taxonomy order is found in the dries...,A,USA
3,One soil order is comprised of deeply weathere...,A,USA
4,Which soil would be the most likely to resist ...,B,USA
5,"Using a soil texture triangle, what type of so...",C,USA
6,"Using the soil texture triangle, select the co...",C,USA
7,The soil consists of four major components i.e...,D,USA
8,Soil texture is made up of soil particles and ...,D,USA
9,Field capacity is the point where the soil wat...,B,USA


## 2.2 Brazil Expert Q&A

In [51]:
json_pattern = os.path.join(
    "..", "regional_best_practices", "brazil_questions", "bulk", "*.json"
)

file_list = [f for f in glob.glob(json_pattern, recursive=True)]


brazil_questions = pd.read_json(r'D:\Users\elias\Desktop\Oxford lab rotation\llm_spartial_generalization\regional_best_practices\brazil_questions\bulk\bulk-algodao.json', lines=True)
brazil_questions = brazil_questions[["question", "answer"]].dropna().sample(n=20)

for file in file_list[1:]:
    #open file as dataframe
    df = pd.read_json(file, lines=True).drop(columns= ["index"]).dropna()
    # randomly select 25 questions	
    df = df.sample(n=20)
    #append to brazil_questions
    brazil_questions = pd.concat([brazil_questions, df[["question", "answer"]].dropna()], ignore_index=True)

brazil_questions["region"] = "Brazil"

# save questions to csv
brazil_questions.to_csv(r'../regional_best_practices/brazil_questions/brazil_questions.csv', index = False)


In [52]:
brazil_questions = pd.read_csv(r'../regional_best_practices/brazil_questions/brazil_questions.csv')
brazil_questions

Unnamed: 0,question,answer,region
0,Quais os delineamentos experimentais mais usad...,<p>Os delineamentos experimentais utilizados n...,Brazil
1,O que é um fio texturizado?,"<p>É o fio constituído por filamentos, aos qua...",Brazil
2,Existe controle químico para doenças causadas ...,"<p>Sim. Porém, seu uso não é comum em virtude ...",Brazil
3,Como é feita a alimentação da usina para o ben...,<p>Os fardões de algodão em caroço são desmanc...,Brazil
4,"Quais as cultivares resistentes a doenças, dis...","<p>As cultivares resistentes a doenças, dispon...",Brazil
...,...,...,...
215,Qual é o efeito do ambiente sobre a aplicação ...,"<p>O vento, a temperatura e a umidade relativa...",Brazil
216,A adubação de manutenção do vinhedo deve ser r...,<p>Se o vinhedo for conduzido conforme os crit...,Brazil
217,O que é a análise sensorial do vinho?,"<p>É a avaliação qualitativa do vinho, efetuad...",Brazil
218,A que distância do solo deve ficar a região da...,"<p>No caso de muda enxertada, é conveniente qu...",Brazil


## 2.3 India Exam

In [53]:
# load temp_data\india_quiz_questions.json

india_questions = pd.read_json(r'../temp_data/india_quiz_questions.json')

india_questions["region"] = "India"
india_questions["question"] = india_questions["question"] + " Answer options: " + india_questions["answer_options"]
india_questions.drop(columns=["answer_options"], inplace=True)
india_questions = india_questions.rename(columns={"correct_answer": "answer"})

india_questions

Unnamed: 0,question,answer,region
0,Glycolysis is known as? Answer options: A. EMP...,A,India
1,“Cell Organiser” is? Answer options: A. Riboso...,B,India
2,"Rice seed is known as? Answer options: A. Pod,...",D,India
3,Paddy inflorescence is? Answer options: A. Ear...,D,India
4,Fruit ripening hormone? Answer options: A. Cyt...,C,India
...,...,...,...
282,Muga silk is reared on? Answer options: A) Som...,A,India
283,When the plant is propagated by small tissue o...,B,India
284,Which among the following is the highest prior...,A,India
285,Which of the following is a bacterial infectio...,D,India


In [54]:
questions_df = pd.DataFrame(columns=["question", "answer", "region"])
questions_df = pd.concat([questions_df, india_questions, brazil_questions, ffa_soil_practicum])
questions_df

Unnamed: 0,question,answer,region
0,Glycolysis is known as? Answer options: A. EMP...,A,India
1,“Cell Organiser” is? Answer options: A. Riboso...,B,India
2,"Rice seed is known as? Answer options: A. Pod,...",D,India
3,Paddy inflorescence is? Answer options: A. Ear...,D,India
4,Fruit ripening hormone? Answer options: A. Cyt...,C,India
...,...,...,...
22,One soil order is comprised mainly of organic ...,A,USA
23,Which soil would be the most likely to resist ...,B,USA
24,"Using a soil texture triangle, what type of so...",D,USA
25,"Using the Soil Texture Triangle, select the co...",A,USA


# 3. Answer Generation

## 3.1 HuggingFace Models

In [None]:
models = {
    "gemma_7b_it": gemma_7b_it,
    "gemma_2b_it": gemma_2b_it,
    "mistral_7b": mistral_7b,
    "mistral_8x7b": mistral_8x7b,
    "llama2_7b": llama2_7b,
    "llama2_13b": llama2_13b,
}

In [55]:
prompts = ["""Answer the given question. Return the LETTER of the correct answer.""",
    """You are an expert in agriculture. Answer the given question about agriculture. Return the LETTER of the correct answer.""",
    """You are an expert in agriculture. Answer the given question about agriculture truthfully, concisely, and precisely for the described location. Return the LETTER of the correct answer.""",
    """You are an expert in agriculture. Answer the given question about agriculture truthfully, concisely, and precisely for the described location. If you answer correctly, you will receive $1,000. If you answer incorrectly, you will be fined $1,000. Return the LETTER of the correct answer.""",
    """You are an expert in agriculture. Answer the given question about agriculture truthfully, concisely, and precisely for the described location. 
    Think step by step. FIRST, start with a "Thought:" step, where you think about possible answer options. Be critical of your answer and think of the possible options to answer the given question. THEN, based on these considerations, give in the "Final Answer:" step your final answer. Return the LETTER of the correct answer."""
]

In [None]:
quiz_results_hf_df = pd.DataFrame(columns=["question", "template_answer", "llm_answer", "region", "model", "prompt"])

i = 0

for index, dict in questions_df.iterrows():
    i += 1
    print("################## " + str(i))
    question = dict['question']
    region = dict['region']
    template_answer = dict['answer']
    question = "Region: " + region + ", Question: " + question
    for prompt in prompts:
        for model in models:
            print(model)
            completion = models[model](prompt, question)
            # evaluation = chat(evaluation_chat_prompt.format_prompt(question=question, template_answer=template_answer, student_answer=completion).to_messages()).content
            new_row = pd.DataFrame([{"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}])
            quiz_results_df = pd.concat([quiz_results_df, new_row], ignore_index=True)
        print(completion)
    quiz_results_hf_df.to_csv(r'../temp_data/quiz_results_hf_df.csv', sep = ';', index = False)

In [None]:
quiz_results_hf_df

## 3.2. OpenAI Models

In [None]:
client = AsyncOpenAI(api_key="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")

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(20))
async def async_gpt_3_5_turbo(prompt, user_input):
    response = await client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": prompt},
        {"role": "user", "content": user_input}
    ]
    )
    return response.choices[0].message.content



@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(20))
async def async_gpt_4_turbo(prompt, user_input):
    response = await client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[
        {"role": "system", "content": prompt},
        {"role": "user", "content": user_input}
    ]
    )
    return response.choices[0].message.content

In [None]:
oa_models = {
    "gpt_4_turbo": async_gpt_4_turbo,
    "gpt_3_5_turbo": async_gpt_3_5_turbo
}

In [None]:
# Initialize AsyncLimiter: 100 operations per minute means approximately 1.67 operations per second.
limiter = AsyncLimiter(5, 1)

# Assuming json_evaluation_multiple_seasons is an async function that accepts a text and returns a JSON.

quiz_results_oa_df = pd.DataFrame()

async def async_eval(prompt, model, row):
    question = row['question']
    region = row['region']
    template_answer = row['answer']
    question = "Region: " + region + ", Question: " + question
    completion = await oa_models[model](prompt, question)
    new_row = {"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}
    return new_row

async def process_documents(documents_df):
    tasks = []
    for index, row in documents_df.iterrows():
        for prompt in prompts:
            for model in oa_models:
                async with limiter:
                    task = asyncio.create_task(async_eval(prompt, model, row))
                    tasks.append(task)
    results = await asyncio.gather(*tasks, return_exceptions=True)
    print(results)
    for result in results:
        if isinstance(result, Exception):
            print(f"An error occurred: {result}")
    return pd.DataFrame([result for result in results if not isinstance(result, Exception)])


# In case the loop is already running, avoid using loop.run_until_complete()
if not loop.is_running():
    quiz_results_oa_df = loop.run_until_complete(process_documents(questions_df))
else:
    quiz_results_oa_df = await process_documents(questions_df) 


quiz_results_oa_df.to_csv(r'../temp_data/quiz_results_oa_df.csv', sep = ';', index = False)

quiz_results_oa_df

In [None]:
quiz_results_oa_df

## 3.3. Anthopic Models

In [56]:
client = AsyncAnthropic()


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(20))
async def async_claude_instant(prompt, user_input):
    response = await client.messages.create(
        max_tokens=2048,
        messages=[
            {
                "role": "user",
                "content": prompt + """ 
                
                """ + user_input,
            }
        ],
        model="claude-instant-1.2",
    )
    return response.choices[0].message.content


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(20))
async def async_claude_3_sonnet(prompt, user_input):
    response = await client.messages.create(
        max_tokens=2048,
        system= prompt,
        messages=[
            {
                "role": "user",
                "content": user_input
            }
        ],
        model="claude-3-sonnet-20240229",
    )
    return response


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(20))
async def async_claude_3_opus(prompt, user_input):
    response = await client.messages.create(
        max_tokens=2048,
        system= prompt,
        messages=[
            {
                "role": "user",
                "content": user_input
            }
        ],
        model="claude-3-opus-20240229",
    )
    return response.choices[0].message.content

In [57]:
anthropic_models = {
    "claude_instant": async_claude_instant,
    "claude_3_sonnet": async_claude_3_sonnet,
    "claude_3_opus": async_claude_3_opus
}

In [59]:
# Initialize AsyncLimiter: 100 operations per minute means approximately 1.67 operations per second.
limiter = AsyncLimiter(1, 1)

# Assuming json_evaluation_multiple_seasons is an async function that accepts a text and returns a JSON.

quiz_results_anthropic_df = pd.DataFrame()

async def async_eval(prompt, model, row):
    question = row['question']
    region = row['region']
    template_answer = row['answer']
    question = "Region: " + region + ", Question: " + question
    completion = await anthropic_models[model](prompt, question)
    new_row = {"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}
    return new_row

async def process_documents(documents_df):
    tasks = []
    for index, row in documents_df.iterrows():
        for prompt in prompts:
            for model in anthropic_models:
                async with limiter:
                    task = asyncio.create_task(async_eval(prompt, model, row))
                    tasks.append(task)
                    if len(tasks) % 5 == 0:
                        print(f"Submitted {len(tasks)} tasks")
    results = await asyncio.gather(*tasks, return_exceptions=True)
    print(results)
    for result in results:
        if isinstance(result, Exception):
            print(f"An error occurred: {result}")
    return pd.DataFrame([result for result in results if not isinstance(result, Exception)])


# In case the loop is already running, avoid using loop.run_until_complete()
if not loop.is_running():
    quiz_results_anthropic_df = loop.run_until_complete(process_documents(questions_df[150:250]))
else:
    quiz_results_anthropic_df = await process_documents(questions_df[150:250]) 


quiz_results_anthropic_df.to_csv(r'../temp_data/quiz_results_anthropic_df2.csv', sep = ';', index = False)

quiz_results_anthropic_df

Submitted 5 tasks
Submitted 10 tasks
Submitted 15 tasks
Submitted 20 tasks
Submitted 25 tasks
Submitted 30 tasks
Submitted 35 tasks
Submitted 40 tasks
Submitted 45 tasks
Submitted 50 tasks
Submitted 55 tasks
Submitted 60 tasks
Submitted 65 tasks
Submitted 70 tasks
Submitted 75 tasks
Submitted 80 tasks
Submitted 85 tasks
Submitted 90 tasks
Submitted 95 tasks
Submitted 100 tasks
Submitted 105 tasks
Submitted 110 tasks
Submitted 115 tasks
Submitted 120 tasks
Submitted 125 tasks
Submitted 130 tasks
Submitted 135 tasks
Submitted 140 tasks
Submitted 145 tasks
Submitted 150 tasks
Submitted 155 tasks
Submitted 160 tasks
Submitted 165 tasks
Submitted 170 tasks
Submitted 175 tasks
Submitted 180 tasks
Submitted 185 tasks
Submitted 190 tasks
Submitted 195 tasks
Submitted 200 tasks
Submitted 205 tasks
Submitted 210 tasks
Submitted 215 tasks
Submitted 220 tasks
Submitted 225 tasks
Submitted 230 tasks
Submitted 235 tasks
Submitted 240 tasks
Submitted 245 tasks
Submitted 250 tasks
Submitted 255 tasks


Unnamed: 0,question,template_answer,llm_answer,model,region,prompt
0,"Region: India, Question: Mating in sheep is ca...",B,"Message(id='msg_01TnrVVX9E5bMTLUQDojYZsC', con...",claude_3_sonnet,India,Answer the given question. Return the LETTER o...
1,"Region: India, Question: Mating in sheep is ca...",B,"Message(id='msg_01WHGvRwo9M82VnwcEZwThPm', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
2,"Region: India, Question: Mating in sheep is ca...",B,"Message(id='msg_01WUHmf8es2owycBYxzjGEZF', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
3,"Region: India, Question: Mating in sheep is ca...",B,"Message(id='msg_01RdsWRrcLaupMeA4Y5jzM9y', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
4,"Region: India, Question: Mating in sheep is ca...",B,"Message(id='msg_01RSXCKqcHNkwNczo9Ys3Kui', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
...,...,...,...,...,...,...
483,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01F5a3wh6PUk4ofcRahAjz7J', con...",claude_3_sonnet,India,Answer the given question. Return the LETTER o...
484,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01733ji9Rk4hebx655hY5pN8', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
485,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01HcgnK1rKxFVq58ePovewKy', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
486,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01RpqLJRM4ZZUWoJrSos4cwf', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...


In [60]:
anthropic1 = pd.read_csv(r'../temp_data/quiz_results_anthropic_df1.csv', sep = ';')
anthropic2 = pd.read_csv(r'../temp_data/quiz_results_anthropic_df2.csv', sep = ';')

anthropic_df = pd.concat([anthropic1, anthropic2])
anthropic_df

Unnamed: 0,question,template_answer,llm_answer,model,region,prompt
0,"Region: India, Question: Glycolysis is known a...",A,"Message(id='msg_011SamNmjRi4KWsWSUEdMHge', con...",claude_3_sonnet,India,Answer the given question. Return the LETTER o...
1,"Region: India, Question: Glycolysis is known a...",A,"Message(id='msg_0188g8qKDrCFNsDXmwmC27qJ', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
2,"Region: India, Question: Glycolysis is known a...",A,"Message(id='msg_01KiMz1r6zqUQtGNMcwM8t87', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
3,"Region: India, Question: Glycolysis is known a...",A,"Message(id='msg_012kLMzkThkmvuesJjFmETCw', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
4,"Region: India, Question: Glycolysis is known a...",A,"Message(id='msg_01XNhGAXmBkGEFn8CStLhw6Y', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
...,...,...,...,...,...,...
483,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01F5a3wh6PUk4ofcRahAjz7J', con...",claude_3_sonnet,India,Answer the given question. Return the LETTER o...
484,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01733ji9Rk4hebx655hY5pN8', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
485,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01HcgnK1rKxFVq58ePovewKy', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...
486,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01RpqLJRM4ZZUWoJrSos4cwf', con...",claude_3_sonnet,India,You are an expert in agriculture. Answer the g...


In [7]:
quiz_results_hf_df = pd.read_csv(r'../temp_data/quiz_eval_df.csv', sep = ';')
quiz_results_anthropic_df = pd.read_csv(r'../temp_data/quiz_eval_anthropicdf.csv', sep = ';')

quiz_results_total_df = pd.concat([quiz_results_hf_df, quiz_results_anthropic_df], ignore_index=True)
quiz_results_total_df

Unnamed: 0,question,template_answer,llm_answer,region,model,prompt,evaluation
0,"Region: India, Question: Glycolysis is known a...",A,"\n Sure, here is the answer to the question...",India,gemma_7b_it,Answer the given question. Return the LETTER o...,1
1,"Region: India, Question: Glycolysis is known a...",A,\n The correct answer is A. EMP pathway.\n\...,India,gemma_2b_it,Answer the given question. Return the LETTER o...,1
2,"Region: India, Question: Glycolysis is known a...",A,"\n A. A. Glycolysis is not an acronym, but...",India,mistral_7b,Answer the given question. Return the LETTER o...,1
3,"Region: India, Question: Glycolysis is known a...",A,\n \n A. EMP pathway\n\n Explanation:...,India,mistral_8x7b,Answer the given question. Return the LETTER o...,1
4,"Region: India, Question: Glycolysis is known a...",A,The correct answer is (A) EMP pathway.,India,llama2_7b,Answer the given question. Return the LETTER o...,1
...,...,...,...,...,...,...,...
22094,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01F5a3wh6PUk4ofcRahAjz7J', con...",India,claude_3_sonnet,Answer the given question. Return the LETTER o...,0
22095,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01733ji9Rk4hebx655hY5pN8', con...",India,claude_3_sonnet,You are an expert in agriculture. Answer the g...,1
22096,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01HcgnK1rKxFVq58ePovewKy', con...",India,claude_3_sonnet,You are an expert in agriculture. Answer the g...,1
22097,"Region: India, Question: Akiochi disease in ri...",D,"Message(id='msg_01RpqLJRM4ZZUWoJrSos4cwf', con...",India,claude_3_sonnet,You are an expert in agriculture. Answer the g...,1


# 4. Evaluation

In [21]:
evaluation_template = (
    """You are an expert in agriculture. Evaluate this answer given by a student for the provided question in comparison to the template answer. Provide as result the number 0 if the answer is incorrect, 1 if it is correct.
    Base your decision only on the template answer and the student's answer. Do not use any other information or knowledge.
    
    ALWAYS ONLY RETURN THE NUMBER!"""
)

In [19]:
oa_results = pd.read_csv(r'../temp_data/quiz_results_oa_df.csv', sep = ';')
hf_results = pd.read_csv(r'../temp_data/quiz_results_df.csv', sep = ';')

total_results = pd.concat([oa_results, hf_results], ignore_index=True)

random_questions = pd.DataFrame(brazil_results["question"].unique()).sample(n=50, replace=False)
random_questions

brazil_results = total_results[total_results['region'] == 'Brazil']
brazil_results = brazil_results[brazil_results['question'].isin(random_questions[0])]

brazil_results.to_csv(r'../temp_data/brazil_results.csv', sep = ';', index = False)
brazil_results

Unnamed: 0,question,template_answer,llm_answer,model,region,prompt
2940,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,"A) Sim, mas somente no sistema de consórcio.\n...",gpt_4_turbo,Brazil,Answer the given question. Return the LETTER o...
2941,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,A) Sim,gpt_3_5_turbo,Brazil,Answer the given question. Return the LETTER o...
2942,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,A) Sim,gpt_4_turbo,Brazil,You are an expert in agriculture. Answer the g...
2943,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,"Letter A: Sim, o algodoeiro herbáceo pode ser ...",gpt_3_5_turbo,Brazil,You are an expert in agriculture. Answer the g...
2944,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,Sim.,gpt_4_turbo,Brazil,You are an expert in agriculture. Answer the g...
...,...,...,...,...,...,...
20485,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",\n **Thought:**\n\n* The analysis of sensor...,gemma_2b_it,Brazil,You are an expert in agriculture. Answer the g...
20486,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...","\n Thought: The term ""análise sensorial do...",mistral_7b,Brazil,You are an expert in agriculture. Answer the g...
20487,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",\n \n Thought: A sensory analysis of win...,mistral_8x7b,Brazil,You are an expert in agriculture. Answer the g...
20488,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",Thought: When it comes to the analysis of wi...,llama2_7b,Brazil,You are an expert in agriculture. Answer the g...


In [24]:
oa_client = AsyncOpenAI()
limiter = AsyncLimiter(30, 1)

class TaskStatus:
    def __init__(self, task_id):
        self.task_id = task_id
        self.status = "pending"
        self.attempt = 0

    def update_status(self, status):
        self.status = status

    def increment_attempt(self):
        self.attempt += 1

def update_task_status(retry_state: RetryCallState):
    task_status = retry_state.args[0]  # Assuming the first argument is the TaskStatus object
    task_status.update_status("retrying")
    task_status.increment_attempt()

@retry(wait=wait_random_exponential(min=1, max=20),
       stop=stop_after_attempt(100),
       retry_error_callback=update_task_status)
async def async_gpt_3_5_turbo(task_status, prompt, user_input):
    response = await oa_client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": user_input}
        ]
    )
    task_status.update_status("completed")
    return response.choices[0].message.content

@retry(wait=wait_random_exponential(min=1, max=20),
       stop=stop_after_attempt(100),
       retry_error_callback=update_task_status)
async def async_gpt_4_turbo(task_status, prompt, user_input):
    response = await oa_client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": user_input}
        ]
    )
    task_status.update_status("completed")
    return response.choices[0].message.content


async def async_eval(task_status, prompt, row):
    input_str = "student answer: " + row["llm_answer"] + "; correct answer: " + row["template_answer"]
    result = await async_gpt_4_turbo(task_status, evaluation_template, input_str)
    new_row = row.copy()
    new_row['evaluation'] = result
    return new_row

async def process_documents(documents_df):
    tasks = []
    task_statuses = []
    for index, row in documents_df.iterrows():
        async with limiter:
            task_status = TaskStatus(task_id=index)
            task_statuses.append(task_status)
            task = asyncio.create_task(async_eval(task_status, evaluation_template, row))
            tasks.append(task)
            if len(tasks) % 10 == 0:
                print(f"Submitted {len(tasks)} tasks")

    while True:
        completed_count = sum(1 for task_status in task_statuses if task_status.status == "completed")
        total_count = len(tasks)
        progress_percentage = (completed_count / total_count) * 100
        print(f"Progress: {progress_percentage:.5f}%")

        if all(task_status.status == "completed" for task_status in task_statuses):
            break

        await asyncio.sleep(1)

    results = await asyncio.gather(*tasks, return_exceptions=True)
    return pd.DataFrame([result for result in results if not isinstance(result, Exception)])






subset_df = pd.read_csv(r'../temp_data/brazil_results.csv', sep = ';')
print(len(subset_df))

synth_qa_eval_df = pd.DataFrame()

for i in range(0, len(subset_df), 25):
    print("#################################  ", i)
    loop = asyncio.get_event_loop()
    # In case the loop is already running, avoid using loop.run_until_complete()
    if not loop.is_running():
        synth_qa_eval_df_new = loop.run_until_complete(process_documents(subset_df[i:i+25]))
    else:
        synth_qa_eval_df_new = await process_documents(subset_df[i:i+25])

    synth_qa_eval_df = pd.concat([synth_qa_eval_df, synth_qa_eval_df_new], ignore_index=True)

    synth_qa_eval_df.to_csv(r'../temp_data/quiz_eval_brazil_gpt_4.csv', sep=';', index=False)

synth_qa_eval_df

2000
#################################   0
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 60.00000%
Progress: 100.00000%
#################################   25
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 40.00000%
Progress: 92.00000%
Progress: 96.00000%
Progress: 96.00000%
Progress: 100.00000%
#################################   50
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 48.00000%
Progress: 100.00000%
#################################   75
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 60.00000%
Progress: 100.00000%
#################################   100
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 56.00000%
Progress: 100.00000%
#################################   125
Submitted 10 tasks
Submitted 20 tasks
Progress: 0.00000%
Progress: 28.00000%
Progress: 76.00000%
Progress: 88.00000%
Progress: 96.00000%
Progress: 100.00000%
#################################   150
Submitted 10

Unnamed: 0,question,template_answer,llm_answer,model,region,prompt,evaluation
0,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,"A) Sim, mas somente no sistema de consórcio.\n...",gpt_4_turbo,Brazil,Answer the given question. Return the LETTER o...,1
1,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,A) Sim,gpt_3_5_turbo,Brazil,Answer the given question. Return the LETTER o...,1
2,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,A) Sim,gpt_4_turbo,Brazil,You are an expert in agriculture. Answer the g...,1
3,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,"Letter A: Sim, o algodoeiro herbáceo pode ser ...",gpt_3_5_turbo,Brazil,You are an expert in agriculture. Answer the g...,1
4,"Region: Brazil, Question: O algodoeiro herbáce...",<p>O algodoeiro herbáceo é explorado no Brasil...,Sim.,gpt_4_turbo,Brazil,You are an expert in agriculture. Answer the g...,1
...,...,...,...,...,...,...,...
1995,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",\n **Thought:**\n\n* The analysis of sensor...,gemma_2b_it,Brazil,You are an expert in agriculture. Answer the g...,1
1996,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...","\n Thought: The term ""análise sensorial do...",mistral_7b,Brazil,You are an expert in agriculture. Answer the g...,1
1997,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",\n \n Thought: A sensory analysis of win...,mistral_8x7b,Brazil,You are an expert in agriculture. Answer the g...,1
1998,"Region: Brazil, Question: O que é a análise se...","<p>É a avaliação qualitativa do vinho, efetuad...",Thought: When it comes to the analysis of wi...,llama2_7b,Brazil,You are an expert in agriculture. Answer the g...,1
