In [120]:
#%%capture
%pip install groq

Note: you may need to restart the kernel to use updated packages.


In [121]:
import groq
from groq import Groq

import pandas as pd
import ast
from tqdm import tqdm
import time
import os

In [143]:
def load_groq_key():
    try:
        # Open and read the entire content of the file
        with open("groq-key.txt", 'r') as file:
            contents = file.read()
        
        return contents
    
    except FileNotFoundError:
        print(f"The file does not exist.")
        return None
    except Exception as e:
        # Handle other potential exceptions (e.g., permission errors)
        print(f"An error occurred while reading the file: {str(e)}")
        return None
    
groq_key = load_groq_key()
os.environ["GROQ_API_KEY"] = groq_key

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

def groq_chat(system, user):
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": system
                },
                {
                    "role": "user",
                    "content": user,
                }
            ],
            model="llama3-70b-8192",
            max_tokens=2048,
        )

    except groq.APIConnectionError as e:
        print("The server could not be reached")
        print(e.__cause__)  # an underlying Exception, likely raised within httpx.
    except groq.RateLimitError as e:
        print("A 429 status code was received; we should back off a bit.")
    except groq.APIStatusError as e:
        print("Another non-200-range status code was received")
        print(e.status_code)
        print(e.response)
    
    return chat_completion.choices[0].message.content

In [176]:
df_train = pd.read_csv('data/train.csv')

In [177]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,context,question,answerA,answerB,answerC,correct
0,0,"Even though she had homework to do that night,...",What will Jesse want to do next?,read homework to Skylar,help Skylar finish,skip her studying,B
1,1,"After school, Casey met the friend at a bar so...",Why did Casey do this?,have a good idea of the material,goof around with a friend,have a few drinks and leave,A
2,2,Jesse went quickly to their mother and their m...,How would Jesse feel afterwards?,wasting their time,that they are a good child,that their mother always calls them,B
3,3,Robin knew that Kai really wanted her to the l...,Why did Robin do this?,paid her to say that she liked it,she never really liked Kai or her fashion,she knew Kai wanted Robin to like the outfit,C
4,4,Addison slept well last night after playing ba...,Why did Addison do this?,regain her energy,hit a home run,run the bases,A


In [193]:
row_index = 0  # select the row index you're interested in
columns_of_interest = ['context', 'question', 'answerA', 'answerB', 'answerC']  # select the columns you want

row_values = df_train.loc[row_index, columns_of_interest].tolist()

In [194]:
row_values

['Even though she had homework to do that night, Jesse helped Skylar study.',
 'What will Jesse want to do next?',
 'read homework to Skylar',
 'help Skylar finish',
 'skip her studying']

In [213]:
def translate(str_list):
    system = "Could you please translate the following list of strings to Portuguese, please. Please reply in JSON format, without any extra text."
    human = str(str_list)
    response = groq_chat(system, human)
    
    return response

In [217]:
out = translate(row_values)

In [218]:
import json
data = json.loads(out)

In [219]:
out

'{"translations": ["Embora tivesse dever de casa para fazer naquela noite, Jesse ajudou Skylar a estudar.", "O que Jesse vai querer fazer em seguida?", "ler o dever de casa para Skylar", "ajudar Skylar a terminar", "pular seu estudo"]}'

In [214]:
# New dataframe for the translation
df_train_pt = df_train.copy()
df_train_pt.loc[:, ['context', 'question', 'answerA', 'answerB', 'answerC']] = None

In [210]:
df_train_pt

Unnamed: 0.1,Unnamed: 0,context,question,answerA,answerB,answerC,correct
0,0,,,,,,B
1,1,,,,,,A
2,2,,,,,,B
3,3,,,,,,C
4,4,,,,,,A
...,...,...,...,...,...,...,...
33405,33405,,,,,,C
33406,33406,,,,,,C
33407,33407,,,,,,B
33408,33408,,,,,,C


In [211]:
def create_batch(df, step, base):
    row_values = []
    for i in range(step):
        columns_of_interest = ['context', 'question', 'answerA', 'answerB', 'answerC']  # select the columns you want
        row_values += (df.loc[base + i, columns_of_interest].tolist())
    return row_values

In [212]:
batch_size = 30
n_cols = 5

for i in tqdm(range(0, len(df_train), batch_size)):
    time.sleep(2)
    strings = create_batch(df_train, batch_size, i)

    ####  CONVERT TO USE JSON!
    translated_list = translate(strings)
    
    for j in range(0, (int(len(translated_list)/n_cols))):
        df_train_pt.iat[i+j, 1] = translated_list[(j*n_cols)]
        df_train_pt.iat[i+j, 2] = translated_list[(j*n_cols)+1]
        df_train_pt.iat[i+j, 3] = translated_list[(j*n_cols)+2]
        df_train_pt.iat[i+j, 4] = translated_list[(j*n_cols)+3]
        df_train_pt.iat[i+j, 5] = translated_list[(j*n_cols)+4]

  0%|          | 2/1114 [00:35<5:29:29, 17.78s/it]


SyntaxError: unterminated string literal (detected at line 1) (<unknown>, line 1)

In [206]:
display(df_train_pt)

Unnamed: 0.1,Unnamed: 0,context,question,answerA,answerB,answerC,correct
0,0,Embora ela tivesse dever de casa para fazer na...,Qual é a próxima coisa que Jesse vai querer fa...,ler o dever de casa para Skylar,ajudar Skylar a terminar,pular o estudo,B
1,1,"Depois da escola, Casey encontrou um amigo em ...",Por que Casey fez isso?,ter uma boa ideia do material,brincar com um amigo,tomar uns drinques e ir embora,A
2,2,Jesse foi rápido para a mãe e a mãe disse que ...,Como Jesse se sentiria depois disso?,estar wasting seu tempo,que eles são uma criança boa,que a mãe sempre os chama,B
3,3,Robin sabia que Kai realmente queria que ela g...,Por que Robin fez isso?,foi pago para dizer que gostava disso,ela nunca realmente gostava de Kai ou sua moda,ela sabia que Kai queria que Robin gostasse do...,C
4,4,Addison dormiu bem ontem à noite após jogar be...,Por que Addison fez isso?,recuperar sua energia,marcar um home run,correr as bases,A
...,...,...,...,...,...,...,...
33405,33405,,,,,,C
33406,33406,,,,,,C
33407,33407,,,,,,B
33408,33408,,,,,,C
