# 1. Imports

In [None]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from openai import AsyncOpenAI
from langchain.llms import HuggingFaceHub
import os

import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
import asyncio
from aiolimiter import AsyncLimiter
import csv

import seaborn as sns

from models import *



# 2. Data Imports

In [None]:
questions_df = pd.read_json('../temp_data/questions.json')
questions_df = questions_df[~questions_df['question'].str.contains('document')]
questions_df

# 3. Answer Generation

## 3.1 Huggingface Models

In [None]:
models = {
    "gemma_7b_it": gemma_7b_it,
    "gemma_2b_it": gemma_2b_it,
    "mistral_7b": mistral_7b,
    "mistral_8x7b": mistral_8x7b,
    "llama2_7b": llama2_7b,
    "llama2_13b": llama2_13b,
}

In [None]:
prompts = [
    """You are an expert in agriculture. Answer the given question about agriculture truthfully, concisely, and precisely for the described location.""",
]

In [None]:
quiz_results_hf_df = pd.DataFrame(columns=["question", "template_answer", "llm_answer", "region", "model", "prompt"])

i = 0

for index, dict in questions_df.iterrows():
    i += 1
    print("################## " + str(i))
    question = dict['question']
    region = dict['location']
    template_answer = dict['answer']
    question = "Region: " + region + ", Question: " + question
    for prompt in prompts:
        for model in models:
            print(model)
            completion = models[model](prompt, question)
            # evaluation = chat(evaluation_chat_prompt.format_prompt(question=question, template_answer=template_answer, student_answer=completion).to_messages()).content
            new_row = pd.DataFrame([{"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}])
            quiz_results_hf_df = pd.concat([quiz_results_hf_df, new_row], ignore_index=True)
        print(completion)
    quiz_results_hf_df.to_csv(r'../temp_data/synth_qa_results_hf_df.csv', sep = ';', index = False)

In [None]:
quiz_results_hf_df = pd.read_csv('../temp_data/synth_qa_results_hf_df.csv', sep = ';')
quiz_results_hf_df = quiz_results_hf_df[~quiz_results_hf_df["question"].str.contains("document")]
quiz_results_hf_df

## 3.2. OpenAI Models

In [None]:
oa_models = {
    "gpt_4_turbo": async_gpt_4_turbo,
    "gpt_3_5_turbo": async_gpt_3_5_turbo
}

In [None]:
# Initialize AsyncLimiter: 100 operations per minute means approximately 1.67 operations per second.
limiter = AsyncLimiter(5, 1)

# Assuming json_evaluation_multiple_seasons is an async function that accepts a text and returns a JSON.

synth_qa_oa_results_df = pd.DataFrame()

async def async_eval(prompt, model, row):
    question = row['question']
    region = row['location']
    template_answer = row['answer']
    question = "Region: " + region + ", Question: " + question
    completion = await oa_models[model](prompt, question)
    new_row = {"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}
    return new_row

async def process_documents(documents_df):
    tasks = []
    for index, row in documents_df.iterrows():
        for prompt in prompts:
            for model in oa_models:
                async with limiter:
                    task = asyncio.create_task(async_eval(prompt, model, row))
                    tasks.append(task)
    results = await asyncio.gather(*tasks, return_exceptions=True)
    print(results)
    for result in results:
        if isinstance(result, Exception):
            print(f"An error occurred: {result}")
    return pd.DataFrame([result for result in results if not isinstance(result, Exception)])

loop = asyncio.get_event_loop()

# In case the loop is already running, avoid using loop.run_until_complete()
if not loop.is_running():
    synth_qa_oa_results_df = loop.run_until_complete(process_documents(questions_df))
else:
    synth_qa_oa_results_df = await process_documents(questions_df) 


synth_qa_oa_results_df.to_csv(r'../temp_data/synth_qa_oa_results_df.csv', sep = ';', index = False)

synth_qa_oa_results_df

In [None]:
synth_qa_oa_results_df = pd.read_csv('../temp_data/synth_qa_oa_results_df.csv', sep = ';')
synth_qa_oa_results_df 

## 3.3. Anthropic Models

In [None]:
anthropic_models = {
    "claude_instant": async_claude_instant,
    "claude_3_sonnet": async_claude_3_sonnet,
    "claude_3_opus": async_claude_3_opus
}

In [None]:
# Initialize AsyncLimiter: 100 operations per minute means approximately 1.67 operations per second.
limiter = AsyncLimiter(2, 1)

# Assuming json_evaluation_multiple_seasons is an async function that accepts a text and returns a JSON.

synth_qa_results_anthropic_df = pd.DataFrame()

async def async_eval(prompt, model, row):
    question = row['question']
    region = row['location']
    template_answer = row['answer']
    question = "Region: " + region + ", Question: " + question
    completion = await anthropic_models[model](prompt, question)
    new_row = {"question": question, "template_answer": template_answer, "llm_answer": completion, "model": model, "region": region, "prompt": prompt}
    return new_row

async def process_documents(documents_df):
    tasks = []
    for index, row in documents_df.iterrows():
        for prompt in prompts:
            for model in anthropic_models:
                async with limiter:
                    task = asyncio.create_task(async_eval(prompt, model, row))
                    tasks.append(task)
    results = await asyncio.gather(*tasks, return_exceptions=True)
    print(results)
    for result in results:
        if isinstance(result, Exception):
            print(f"An error occurred: {result}")
    return pd.DataFrame([result for result in results if not isinstance(result, Exception)])


# In case the loop is already running, avoid using loop.run_until_complete()
if not loop.is_running():
    synth_qa_results_anthropic_df = loop.run_until_complete(process_documents(questions_df))
else:
    synth_qa_results_anthropic_df = await process_documents(questions_df) 


synth_qa_results_anthropic_df.to_csv(r'../temp_data/synth_qa_results_anthropic_df.csv', sep = ';', index = False)

synth_qa_results_anthropic_df

### Total Results

In [None]:
synth_qa_results_df = pd.concat([synth_qa_oa_results_df, quiz_results_hf_df], ignore_index=True)
synth_qa_results_df.to_csv(r'../temp_data/synth_qa_total_results_df.csv', sep = ';', index = False)
synth_qa_results_df

In [None]:
synth_qa_total_results_df = pd.read_csv('../temp_data/synth_qa_total_results_df.csv', sep = ';')
synth_qa_total_results_df