# Evaluate Classification

## Setup
#### Load the API key and relevant Python libaries.
In this course, we've provided some code that loads the OpenAI API key for you.

In [1]:
import json
import pandas as pd

import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

### Model:

In [16]:
PATH = 'Portuguese.csv'
MODEL = "gpt-3.5-turbo"
TEMPERATURE = 0.0
N_REPETITIONS = 5

In [11]:
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo", 
                                 temperature=0, 
                                 max_tokens=500):
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens,
    )
    
    return response.choices[0].message["content"]

#### Deffine a template for the questions

In [12]:
def generate_question(question):
    
    delimiter = "####"

    system_message = f"""
    You will be provided with medical queries in both English and Portuguese. \
    The medical query will be delimited with \
    {delimiter} characters.
    Each question will have 4 possible answer options.\
    provide the letter with the answer.

    Provide your output in json format with the \
    key: response.

    Responses: A, B, C, D.

    """

    user_message = f"""/
    {question}"""
    
    messages =  [  
    {'role':'system', 
     'content': system_message},    
    {'role':'user', 
     'content': f"{delimiter}{user_message}{delimiter}"},  
    ] 
    
    return messages


### Questions:

In [14]:
df = pd.read_csv(PATH)

### Ask to GPT

In [18]:
responses_english = [[] for n in range(N_REPETITIONS)]
#reasoning_english = [[] for n in range(N_REPETITIONS)]

responses_portuguese = [[] for n in range(N_REPETITIONS)]
#reasoning_portuguese = [[] for n in range(N_REPETITIONS)]

In [None]:
for col in range(df.shape[0]):
    print('*'*50)
    print(f'Question {col+1}: ')
    print(f'English:')
    
    for n in range(N_REPETITIONS):
        
        print(f'Test # {n}')
        
        question = df['english'][col]
        if n == 0:
            print(question)
        messages = generate_question(question)
    
        response = get_completion_from_messages(messages, MODEL, TEMPERATURE)
        # Convert the string into a JSON object
        response = json.loads(response)
        print(response)
        # Append to the list:
        responses_english[n].append(response['response'])
        #reasoning_english[n].append(response['reasoning'])
    
        print(f'\nPortuguese:')
        question = df['portuguese'][col]
        
        if n == 0:
            print(question)
            
        messages = generate_question(question)

        response = get_completion_from_messages(messages, MODEL, TEMPERATURE)
        # Convert the string into a JSON object
        response = json.loads(response)
        print(response)
        # Append to the list:
        responses_portuguese[n].append(response['response'])
        #reasoning_portuguese[n].append(response['reasoning'])

        print('*'*50)
        
    if col == 2:
        break

**************************************************
Question 1: 
English:
Test # 0
In which ocular region are caliciform cells physiologically found?
a) Cornea.
b) Corneoscleral limbus.
c) Gray line.
d) Semilunar fold.


In [13]:
for n in range(N_REPETITIONS):
    df[f'responses_english_{n}'] = responses_english[n]
    #df[f'reasoning_english_{n}'] = reasoning_english[n]

    df[f'responses_portuguese_{n}'] = responses_portuguese[n]
    #df[f'reasoning_portuguese_{n}'] = reasoning_portuguese[n]

In [14]:
if not os.path.exists('responses'):
    os.makedirs('responses')
df.to_csv(f"responses/{MODEL}_Temperature{str(TEMPERATURE).replace('.', '_')}_{N_REPETITIONS}Repetitions.csv", index=False)