# Evaluate Classification

## Setup

#### Load the API key and libaries.

In [4]:
import json
import pandas as pd

import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

#### Load the Constants

In [5]:
PATH = 'data/Portuguese.csv'

MODEL = "gpt-4"
TEMPERATURE = 0.0

REASONING = True
LANGUAGES = ['english', 'portuguese']

### Model:

In [6]:
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo", 
                                 temperature=0, 
                                 max_tokens=500):
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens,
    )
    
    return response.choices[0].message["content"]

#### Deffine a template for the questions

In [7]:
def generate_question(question, LANGUAGES, REASONING, Responses=['A', 'B', 'C', 'D']):
    
    delimiter = "####"

    languages_text = ", ".join(LANGUAGES)

    if REASONING:
        system_message = f"""
        You will be provided with medical queries in this languages: {languages_text}. \
        The medical query will be delimited with \
        {delimiter} characters.
        Each question will have {len(Responses)} possible answer options.\
        provide the letter with the answer and a short sentence answering why the answer was selected \

        Provide your output in json format with the \
        keys: response and reasoning.

        Responses: {", ".join(Responses)}.

        """
    else:
        system_message = f"""
        You will be provided with medical queries in this languages: {languages_text}. \
        The medical query will be delimited with \
        {delimiter} characters.
        Each question will have {len(Responses)} possible answer options.\
        provide the letter with the answer.

        Provide your output in json format with the \
        key: response.

        Responses: {", ".join(Responses)}.

        """

    user_message = f"""/
    {question}"""
    
    messages =  [  
    {'role':'system', 
     'content': system_message},    
    {'role':'user', 
     'content': f"{delimiter}{user_message}{delimiter}"},  
    ] 
    
    return messages


### Questions from a csv file:

In [9]:
df = pd.read_csv(PATH)

### Evaluate the model in question answering per language:

In [11]:
responses = {}
reasoning = {}
for language in LANGUAGES:
    responses[language] = []

    if REASONING:
        reasoning[language] = []

In [12]:
for col in range(df.shape[0]):

    for language in LANGUAGES:
        print('*'*50)
        print(f'Question {col+1}: ')
        print(f'Language: {language}')

        question = df[language][col]
        print('Question: ')
        print(question)

        messages = generate_question(question)
        response = get_completion_from_messages(messages, MODEL, TEMPERATURE)
        # Convert the string into a JSON object
        response = json.loads(response)
        print(response)
        
        # Append to the list:
        responses[language].append(response['response'])
        reasoning[language].append(response['reasoning'])
        
    print('*'*50)

**************************************************
Question 1: 
English:
In which ocular region are caliciform cells physiologically found?
a) Cornea.
b) Corneoscleral limbus.
c) Gray line.
d) Semilunar fold.
{'response': 'A', 'reasoning': 'Caliciform cells, also known as goblet cells, are found in the cornea. They are responsible for the production of the mucin layer of the tear film, which helps to keep the eye moist.'}

Portuguese:
Em qual região ocular células caliciformes são fisiologicamente encontradas?
a)Córnea.
b)Limbo corneoescleral.
c)Linha cinzenta.
d)Prega semilunar.
{'response': 'A', 'reasoning': 'As células caliciformes são encontradas na córnea. Elas são responsáveis pela produção de muco que ajuda a manter a superfície do olho úmida.'}
**************************************************
**************************************************
Question 2: 
English:
Mark the alternative that best correlates the histological characteristics with the respective ocular tissues:

I

In [13]:
for language in LANGUAGES:
    df[f'responses_{language}'] = responses[language]
    df[f'reasoning_{language}'] = reasoning[language]

In [None]:
if not os.path.exists('responses'):
    os.makedirs('responses')

df.to_csv(f"responses/{MODEL}_Temperature{str(TEMPERATURE).replace('.', '_')}.csv", index=False)