# Notebook to test the completeness check functionality of the IBJ project.

Test on Burundi, 3rd country most visited and critical on human rights issues.

In [None]:
# LIBRARIES ---------------------------------------------------
import os
os.chdir('/Users/dianaavalos/PycharmProjects/InternationalBridgesToJustice')
import json
from pprint import pprint
from tqdm import tqdm
from src.config import Paths
from src.openai_utils import openai_client
from src.chromadb_utils import load_collection
from src.get_completeness import KeypointEvaluation, schema_completeness
from src.query_functions import get_completeness_keypoints
from src.file_manager import get_country_names

Load the prompt and system prompt for the completeness check.

In [None]:
with open(Paths.PATH_FILE_PROMPT_COMPLETENESS, "r") as file:
    prompt_completeness = file.read()

with open(Paths.PATH_FILE_SYSTEM_PROMPT_COMPLETENESS, "r") as file:
    system_prompt = file.read()

print(prompt_completeness[:1000])

In [None]:
print(system_prompt)

In [None]:
completeness_keypoints = get_completeness_keypoints(completeness_checklist_filepath = Paths.PATH_MD_FILE_COMPLETENESS_KEYPOINTS)
collection = load_collection(Paths.PATH_CHROMADB, Paths.COLLECTION_NAME)

country_names = get_country_names(country_names_filepath="data/interim/country_names_1.txt")
country_names = ["Burundi"] 

Print the keypoints to check for completeness.

In [None]:
chapter = ""
for country in country_names:
    for point in tqdm(completeness_keypoints):
        # if point is not a new chapter (look at the indentation to know)
        indent = len(point) - len(
            point.lstrip()
        )
        if indent == 0:
            chapter = point
        if indent > 0:
            print(f"\033[93m{chapter}:\033[0m\033[94m{point}\033[0m")
            keypoint_to_check = f"{chapter}: {point}"

In [None]:
country = "Burundi"
keypoint_to_check = "6. Court Procedures:      4. Expert Witnesses"
keypoint_to_check =  "2. Rights of the Accused:   4. Right to Medical Care"

evaluation = KeypointEvaluation(country=country, chapter=chapter, point=keypoint_to_check, system_prompt=system_prompt, model="gpt-4o-mini", collection=collection, lazy=True)

In [None]:
evaluation.run_similarity_searches(collection=collection)
evaluation.define_prompt(prompt_completeness=prompt_completeness)

In [None]:
print(evaluation.prompt)

In [None]:
evaluation.check_completeness(client=openai_client, temperature=0.1)

In [None]:
evaluation.answer = json.loads(evaluation.answer) #loads the str into a dict
type(evaluation.answer)

In [None]:
evaluation.add_similarity_metadata_to_answer()

In [None]:
pprint(evaluation.answer)

Now we want to do exactly this in batches per country with all keypoints.

In [None]:
request = evaluation.build_batch_request(
    custom_id=f"{country}-{keypoint_to_check}",
    user_prompt=evaluation.prompt,
    temperature=0.1
)

In [None]:
jsonl_file_completeness_batch = f"{Paths.PATH_FOLDER_COMPLETENESS}/batch_input_1.jsonl"
with open(jsonl_file_completeness_batch, "a") as outfile:
    outfile.write(json.dumps(request) + "\n")
