Before staring working with groq, register on the website and receive your own unike API key. 

In [20]:
from groq_api import KEY
from groq import Groq
import numpy as np
import pandas as pd

from tqdm import tqdm


CLIENT = Groq(api_key=KEY)
PATH = 'data/power/power-ua-train.tsv'


Read csv file as a pandas dataframe, set index to the index from the csv file, add "prediction" and "reasoning" columns.
Lastly, shaffle the data (by sampling all the rows in random order), setting random state for reproducibility.

In [12]:
data = pd.read_csv(PATH, sep='\t', index_col=0)
# set object column dtype full of nans
data['prediction'] = np.nan
data['reasoning'] = np.nan
data['labeled_prediction'] = np.nan
data = data.sample(frac=1, random_state=42)
data.head()


Unnamed: 0_level_0,speaker,sex,text,text_en,label,prediction,reasoning,labeled_prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ua05796,d18023ad9b6df26ee266bb01fc4c822a,M,"Олександр Шевченко, фракція ""<PARTY>"". Ратифік...","Alexander Shevchenko, Freedom faction. Ratific...",1,,,
ua07362,bea4118a3082afea430ffe03d4343b76,M,"Шановний пане Голово, шановні колеги, дозвольт...","Dear Mr. Head, honourable colleagues, let me i...",0,,,
ua10560,b07c8d09c03a6a3222057c300c84e9fe,M,"Олег Купрієнко, <PARTY>. А ще порядок в Україн...","Oleg Kuprienko, Oleg Laska Radical Party. And ...",1,,,
ua10327,93a1d11f680ed07b734319a5fae11cee,F,"По-перше, що стосується військового госпіталю,...","First of all, it's about a military hospital i...",0,,,
ua15066,e7d36da025052d2213a757d55deeb1d1,M,Шановна президія! Шановні народні депутати! Ре...,Honorable Predestination! Dear National Deputi...,0,,,


In [13]:
print('Size of data: ', data.shape[0])

Size of data:  11324


In [None]:
PROMPT = '''You are an expert in Ukrainian political life and parlamentary debates in the 2012-2024 years. Help me identify whether this speech was held by coalition or opposition. First reason about the text and try to find helpful cues. Then provide a final answer. Formulate your response in JSON using the following format: {"reasoning": "...", "prediction": "coalition or opposition"}. Make sure to return proper JSON, e.g. don't add superfluous single quotation marks in the end. Don't use any other words in the response, except for "coalition" or "opposition".'''

In [14]:
import json


def get_response(text):
    messages=[
            {
                "role": "system",
                "content": PROMPT,
            },
            {
                "role": "user",
                "content": text,
            },
        ]
    for i in range(5):
        try:
            chat_completion = CLIENT.chat.completions.create(
                messages=messages,
                model="llama3-70b-8192",
                response_format={"type": "json_object"},
                temperature=0.2 * i,             
            )
            response_content = chat_completion.choices[0].message.content
            response = json.loads(response_content)
            return response
        except Exception as e:
            print(e)
            break
    else:
        return None

In order to get examples for a few-shot learning, get a few correctly classified answers from LLama3 and then provide the reasoning in those answers as a prompt to the model.

In [19]:
NUM_SAMPLES = 10
error_counter = 0

for index, entry in (tqdm_bar := tqdm(data.iloc[:NUM_SAMPLES].iterrows(), total=NUM_SAMPLES)):
    text = entry['text']
    response = get_response(text)
    if not response:
        error_counter += 1
        continue
    data.loc[index, 'prediction'] = response['prediction']
    data.loc[index, 'reasoning'] = response['reasoning']
    if response['prediction'] not in ['coalition', 'opposition']:
        tqdm_bar.set_description(f"Error: {response['prediction']}")
    data.loc[index, 'labeled_prediction'] = int(data.loc[index, 'prediction'] == 'opposition')
    # Update true/false count to see in the progress bar 
    data_subset = data.loc[:index].dropna(subset=['labeled_prediction', 'prediction'])
    distribution = (data_subset['labeled_prediction'] == data_subset['label']).value_counts(normalize=True)
    tqdm_bar.set_description(
        f"Accuracy: {distribution.get(True, 0):.2f}" 
        f" Errors: {error_counter=}"
    )

  0%|          | 0/10 [00:00<?, ?it/s]

Accuracy: 0.50 Errors: error_counter=0: 100%|██████████| 10/10 [00:14<00:00,  1.41s/it]


In [6]:
filtered_data = data[data['label'] == data['labeled_prediction']][:4]
filtered_data["json_response"] = filtered_data.apply(lambda x: f'{{"reasoning": "{x.reasoning}", "prediction": "{x.prediction}"}}', axis=1)
filtered_data

Unnamed: 0_level_0,speaker,sex,text,text_en,label,prediction,reasoning,labeled_prediction,json_response
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ua07362,bea4118a3082afea430ffe03d4343b76,M,"Шановний пане Голово, шановні колеги, дозвольт...","Dear Mr. Head, honourable colleagues, let me i...",0,coalition,The speech is likely to be held by a coalition...,0.0,"{""reasoning"": ""The speech is likely to be held..."
ua10560,b07c8d09c03a6a3222057c300c84e9fe,M,"Олег Купрієнко, <PARTY>. А ще порядок в Україн...","Oleg Kuprienko, Oleg Laska Radical Party. And ...",1,opposition,The speaker is demanding that the parliament c...,1.0,"{""reasoning"": ""The speaker is demanding that t..."
ua15066,e7d36da025052d2213a757d55deeb1d1,M,Шановна президія! Шановні народні депутати! Ре...,Honorable Predestination! Dear National Deputi...,0,coalition,"The speech is formal and objective, presenting...",0.0,"{""reasoning"": ""The speech is formal and object..."
ua15462,ef4246303aea1a72eb0a68db8ef502c5,M,"Шановний Руслан Олексійович, ви збирали, ви зб...","Dear Ruslan Alexiyovich, you've gathered, you'...",1,opposition,The speaker is criticizing the Committee on Eu...,1.0,"{""reasoning"": ""The speaker is criticizing the ..."


In [7]:
def get_response_few_shot(text, few_shots_data):
    messages = [
        {
            "role": "system",
            "content": PROMPT
        },
    ]
    for _, row in few_shots_data.iterrows():
        messages.append({"role": "user", "content": row.text})
        messages.append({"role": "assistant", "content": row.json_response})

    messages.append({"role": "user", "content": text})
    
    for i in range(5):
        try:
            
            chat_completion = CLIENT.chat.completions.create(
                messages=messages,
                model="llama3-70b-8192",
                response_format={"type": "json_object"},
                temperature=0.2 * i,             
            )
            response_content = chat_completion.choices[0].message.content
            response = json.loads(response_content)
            return response
        except Exception as e:
            print(e)
            continue
    else:
        return None


In [8]:
NUM_SAMPLES = 670  # in order to compare with 0-shot learning
# NUM_SAMPLES = int(data.shape[0] * 0.2)  # 20% of the data
error_counter = 0

for index, entry in (tqdm_bar := tqdm(data.iloc[:NUM_SAMPLES].iterrows(), total=NUM_SAMPLES)):
    text = entry['text']
    response = get_response_few_shot(text, filtered_data)
    if not response:
        error_counter += 1
        continue
    if response['prediction'] not in ['coalition', 'opposition']:
        error_counter += 1
        continue

    data.loc[index, 'prediction'] = response['prediction']
    data.loc[index, 'reasoning'] = response['reasoning']
    data.loc[index, 'labeled_prediction'] = int(data.loc[index, 'prediction'] == 'opposition')

    # Update true/false count to see in the progress bar 
    data_subset = data.loc[:index].dropna(subset=['labeled_prediction', 'prediction'])
    distribution = (data_subset['labeled_prediction'] == data_subset['label']).value_counts(normalize=True)
    tqdm_bar.set_description(f"Accuracy: {distribution.get(True, 0):.2f}")

  0%|          | 0/670 [00:00<?, ?it/s]

Accuracy: 0.65:  32%|███▏      | 216/670 [30:38<35:46,  4.73s/it]  

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6187. Please try again in 1.87s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6187. Please try again in 1.87s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6187. Please try again in 1.87s. Visit https://console.groq.com/docs/rate-limits for more information.

Accuracy: 0.65:  32%|███▏      | 217/670 [31:05<1:26:45, 11.49s/it]

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6187. Please try again in 1.87s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Accuracy: 0.67:  56%|█████▌    | 373/670 [48:33<10:42,  2.16s/it]  

Error code: 400 - {'error': {'message': "Failed to generate JSON. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'json_validate_failed', 'failed_generation': "It seems like you're asking about the context of the speeches I analyzed earlier. Based on the content of the speeches, it appears that they are related to the Ukrainian parliament (Verkhovna Rada) and do not specifically mention Ivano-Frankivsk region or city.\n\nHowever, I can try to provide some general information. Ivano-Frankivsk is a city in western Ukraine, and it is indeed a city of regional importance. The city has a complex history, and during World War II, it was occupied by Nazi Germany, and some Ukrainian nationalists, including Stepan Bandera, cooperated with the German authorities.\n\nRegarding the Pantheon, there is a Memorial Complex in Ivano-Frankivsk that honors Ukrainian nationalists, including Stepan Bandera and Roman Shukhevych, who were involv

Accuracy: 0.67:  56%|█████▌    | 374/670 [48:45<24:31,  4.97s/it]

Error code: 400 - {'error': {'message': "Failed to generate JSON. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'json_validate_failed', 'failed_generation': "I think there might be some confusion here. The conversation we had earlier was about Ukrainian parliamentary debates, and now you're asking about Ivano-Frankivsk region and some specific cities and historical figures. It seems like we've jumped to a different topic.\n\nTo answer your question, Ivano-Frankivsk is a city in western Ukraine and is indeed a city of regional importance. As for the Pantheon, I'm not aware of a specific Pantheon on the territory of UN-OPA (which I assume stands for Ukrainian National Liberation Army) that honors figures like Bandera and Shuhevich, who were Ukrainian nationalists involved in the Ukrainian independence movement during World War II. However, there are monuments and memorials dedicated to these figures in various cities acros

Accuracy: 0.69:  89%|████████▉ | 599/670 [1:20:13<05:17,  4.47s/it]

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6165. Please try again in 1.65s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6165. Please try again in 1.65s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6165. Please try again in 1.65s. Visit https://console.groq.com/docs/rate-limits for more information.

Accuracy: 0.69:  90%|████████▉ | 600/670 [1:20:51<16:58, 14.56s/it]

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~6165. Please try again in 1.65s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Accuracy: 0.69: 100%|█████████▉| 669/670 [1:31:13<00:06,  6.98s/it]

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~7436. Please try again in 14.36s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~7436. Please try again in 14.36s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~7436. Please try again in 14.36s. Visit https://console.groq.com/docs/rate-limits for more informati

Accuracy: 0.69: 100%|██████████| 670/670 [1:34:05<00:00,  8.43s/it]

Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama3-70b-8192` in organization `org_01hyxq37sqedzv9mxk1rtkzebz` on tokens per minute (TPM): Limit 6000, Used 0, Requested ~7436. Please try again in 14.36s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}





In [9]:
data_subset = data.dropna(subset=['labeled_prediction', 'prediction'])
distribution = (data_subset['labeled_prediction'] == data_subset['label']).value_counts(normalize=True)
print(f"Accuracy: {distribution.get(True, 0):.2f}")
print(f'Errors:', error_counter)

Accuracy: 0.69
Errors: 4


In [12]:
import os 

FOLDER_PATH = 'results'
FILE_PATH = 'results/power-ua-results-few.tsv'

if not os.path.exists(FOLDER_PATH):
    os.makedirs(FOLDER_PATH)

data.to_csv(FILE_PATH, sep='\t')