In [13]:
import json
import time
import random
from datetime import datetime
from easy_open_ai import get_answer_with_instruction # requires .env with OPENAI_API_KEY=
from utils import normalize_string_to_filename

# OpenAI Takes Ukrainian Language Quiz 

In this file we will be making API calls to the [Chat Completion](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) endpoints using [one funny library](https://github.com/anilev6/easy-open-ai) - a question for GPT with the instruction. 

A chosen model of ChatGPT answers the quiz, filling the data_\{model\}.json file, spawning a database of quiz results, that closely resembles the original quiz questions database, but is easier to visualize. 

For this task we will need *a quiz database* as .json with the specific structure. 

In [27]:
MODEL = "gpt-3.5-turbo"
RAW_QUIZ_DB_PATH = "project-1-at-2024-02-13-22-30-ff0f6dc4.json"
CLEAN_DATA_PATH = "clean_data.json"
PROCESSED_DATA_PATH = normalize_string_to_filename(f"data_{MODEL}") + ".json"
REPORT_PATH = normalize_string_to_filename(f"report_{MODEL}") + ".txt"

We first clean the data, leaving only the most necessary, and spawning a new clean .json as the result.

In [9]:
def clean_json():

    with open(RAW_QUIZ_DB_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)

    cleaned_data = []
    for item in data:
        cleaned_item = {
            "id": item.get("id"),
            "data": {
                "context": item.get("data", {}).get("context"),
                "question": item.get("data", {}).get("question"),
                "options": item.get("data", {}).get("options"),
                "answer": item.get("data", {}).get("answer"),
            },
        }
        cleaned_data.append(cleaned_item)

    with open(CLEAN_DATA_PATH, "w", encoding="utf-8") as f:
        json.dump(cleaned_data, f, ensure_ascii=False, indent=4)

# uncomment to launch
# clean_json()

Now, having the clean data we are ready to poll ChatGPT.

The results will be saved as the same .json, but with an extra key of the type 'model_answer' in 'data'.

In [10]:
def get_story(data: dict) -> str:
    result = "\n".join([data.get("context", ""), data.get("question", "")])
    result += "\nOPTIONS\n"
    result += "\n".join(
        [f"{i} : {opt}" for i, opt in enumerate(data.get("options", []))]
    )
    result += "\nWhich option best fills the blank in the story?"
    return result

In [11]:
GENERAL_INSTRUCTION = """
This is a language quiz. 
Read the story with a blank and the answer options provided. 
Your task is to select the best option that completes the sentence 
instead of "______". 
Respond with the index of the correct option (e.g. number, 0).
"""

def process_item(item: dict):
    data = item.get("data")
    options = data.get("options")
    question = get_story(data)
    instruction = GENERAL_INSTRUCTION
    response = get_answer_with_instruction(
        question, instruction, chaos_coefficient=0, max_tokens=2000, model = MODEL
    )
    try:
        result = options[int(response)]
    except (ValueError, IndexError) as e:
        try:
            # insead of the number it returns the whole line
            result = response.split(":")[1].strip()
        except IndexError:
            result = None

    item["data"][f"{MODEL}_answer"] = result

In [24]:
def process_all_items():

    with open(CLEAN_DATA_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)

    for i, item in enumerate(data):
        if not item.get(f"{MODEL}_answer"):
            try:
                time.sleep(random.randint(1,7))
                process_item(data[i])
            except Exception as e:
                print(f"WARNING {e}")
                break

    
    with open(PROCESSED_DATA_PATH, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

# process_all_items()



The process of polling takes some time to be gentle with the API, but at the end we obtain the processed data.

## Results

We will now count the success rate.

In [22]:
def is_model_right(data:dict):
    answer = data.get("answer")
    model_answer = data.get(f"{MODEL}_answer")
    if model_answer:
        return 1 if answer == model_answer else 0

In [25]:
def get_total_and_sucsess():
    with open(PROCESSED_DATA_PATH, "r", encoding="utf-8") as f:
        db = json.load(f)
    total = [is_model_right(item.get("data",{})) for item in db if is_model_right(item.get("data",{})) is not None]
    return len(total), sum(total)

In [28]:
def create_report():
    total, suc = get_total_and_sucsess()

    TEXT = f"""
    RESULTS_OBTAINED_FROM : {RAW_QUIZ_DB_PATH}
    RESULTS_OBTAINED_AT : {datetime.now()}
    MODEL : {MODEL}
    TOTAL TASKS SOLVED : {total}
    SUCCESS : {suc}
    RATE : {int(round(suc/total,2)*100)} %
    """
    print(TEXT)

    with open(REPORT_PATH, "w") as file:
        file.write(TEXT)

# create_report()


    RESULTS_OBTAINED_FROM : project-1-at-2024-02-13-22-30-ff0f6dc4.json
    RESULTS_OBTAINED_AT : 2024-02-14 04:12:41.205327
    MODEL : gpt-3.5-turbo
    TOTAL TASKS SOLVED : 559
    SUCCESS : 304
    RATE : 54 %
    


We can see, that the success rate is very low, **which strongly suggests a Ukrainian-speaking LLM needs further development.**

For the further dive into the topic, one should proceed with implementing more experiments with different OpenAI models, as well as fine-tuned models with more Ukrainian literature fed to it, and compare the success rates.  