In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv
import json

load_dotenv()  # Loads variables from .env into environment

api_key = os.getenv("API_KEY")


client = OpenAI(
    api_key=api_key,  # This is the default and can be omitted
)

### JSON structure for the response

In [None]:
validation_response = {
                    "true_positives": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "A_fact": { "type": "string" },
                                "B_fact": { "type": "string" }
                            },
                            "required": ["A_fact", "B_fact"]
                        }
                    },
                    "false_positives": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "A_fact": { "type": "string" }
                            },
                            "required": ["A_fact"]
                        }
                    },
                    "false_negatives": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "B_fact": { "type": "string" }
                            },
                            "required": ["B_fact"]
                        }
                    },
                    "summary_counts": {
                        "type": "object",
                        "properties": {
                            "TP": {"type": "integer"},
                            "FP": {"type": "integer"},
                            "FN": {"type": "integer"}
                        },
                        "required": ["TP", "FP", "FN"]
                    }
}

In [None]:
def ReadFile(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            txt = file.read().strip()
            return txt
    else:
        raise FileNotFoundError(f"Error: {file_path} not found!")
    

def make_batch_input_jsonl(prompt_text: str, output_path: str, model: str = "o3-mini"):
    """
    prompt_text: instruction prompt (str)
    output_path: where to write batch .jsonl file
    model: OpenAI model name
    """

    with open(output_path, 'w', encoding='utf-8') as batch_file:
        for doctor_index in range(1, 11):  # 10 doctors
            for patient_index in range(1, 11):  # 10 patients
                for prompt_index in range(1, 6):  # 5 prompts
                    try:
                        doc_path = f"Arst_{doctor_index:03}/Patsient_{patient_index:03}/toorfailid/arsti_kokkuvote_orig_{doctor_index:02}_{patient_index:02}.txt"

                        ai_path = f"summaries/arst_{doctor_index}_patsient_{patient_index}/kokkuvõtted/prompt_{prompt_index}/arst_{doctor_index:02}_patsient_{patient_index:02}_kokkuvõte_prompt_{prompt_index}_a.txt"
                                
                        summary_A = ReadFile(ai_path)
                        summary_B = ReadFile(doc_path)

                        full_prompt = f"{prompt_text}\nSummary A:\n{summary_A}\nSummary B:\n{summary_B}"
                        messages = [{"role": "user", "content": full_prompt}]


                        entry = {
                            "custom_id" : f"request-{doctor_index}-{patient_index}-{prompt_index}",
                            "method": "POST",
                            "url": "/v1/chat/completions",
                            "body" : {
                                "model": model,
                                "messages": messages,
                                "response_format": { 
                                    "type": "json_schema",
                                    "json_schema": {
                                        "name": "validation_response",
                                        "schema": {
                                            "type": "object",
                                            "properties": validation_response
                                        }
                                    }
                                },
                                "reasoning_effort": "high",
                            }
                        }

                        batch_file.write(json.dumps(entry, ensure_ascii=False) + "\n")

                    except FileNotFoundError as e:
                        print(f"[Skipped] {e}")
                    except Exception as e:
                        print(f"[Error] D{doctor_index} P{patient_index} Prompt{prompt_index}: {e}")
    
    print(f"\n Batch input JSONL written to: {output_path}")

In [None]:
input_prompt = ReadFile("LLM_as_judge_prompt_5.txt")

output_path = "batch_o3-mini-high.jsonl"
make_batch_input_jsonl(input_prompt, output_path, "o3-mini")

batch_input_file = client.files.create(
    file=open(output_path, "rb"),
    purpose="batch"
)

print(batch_input_file)

### Send batch file to be processed

In [None]:

batch_input_file_id = batch_input_file.id
print(batch_input_file_id)
batch = client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": "test"
    }
)

print(batch.id)


### Use, if you need to cancel the batch

In [None]:
client.batches.cancel("batch_6833d261ddf48190ae2de54a18fa161d")

### Check the progress

In [None]:
#batch = client.batches.retrieve('batch_681ab10149dc8190822e78baa1709017')
print(batch)
print(batch.metadata)
print(batch.output_file_id)
print(batch.request_counts)

### If completed, save the output

In [None]:
file_response = client.files.content(f"{batch.output_file_id}")

model_name = "o3-mini-high"
with open(f"batch_output_10_runs_reversed_{model_name}_44.jsonl", "w", encoding="utf-8") as f:
    f.write(file_response.text)

## Parsing the results from the jsonl file

In [None]:
from typing import Dict
import csv

def calculate_metrics(tp: int, fp: int, fn: int) -> Dict[str, float]:
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    return {"Precision": precision, "Recall": recall, "F1-score": f1_score}

def average_metrics(metrics1: Dict[str, float], metrics2: Dict[str, float]) -> Dict[str, float]:
    return {
        "Precision": (metrics1["Precision"] + metrics2["Precision"]) / 2,
        "Recall": (metrics1["Recall"] + metrics2["Recall"]) / 2,
        "F1-score": (metrics1["F1-score"] + metrics2["F1-score"]) / 2,
    }

def GetSummaryCounts(parsed):
    response = parsed['response']['body']['choices'][0]['message']['content']
    validation_response = json.loads(response)
    summary_counts = validation_response['summary_counts']

    tp = summary_counts['TP']
    fp = summary_counts['FP']
    fn = summary_counts['FN']

    return (tp, fp, fn)

### Batch results with o3-mini-high with 2 runs

In [None]:
with open("batch_output_normal_run_0.jsonl", "r", encoding="utf-8") as f:
    parsed_normal = [json.loads(line) for line in f]

with open("batch_output_reversed_run_0.jsonl", "r", encoding="utf-8") as f:
    parsed_reversed = [json.loads(line) for line in f]


csv_filename = "o3-mini-high-validations-with-2-runs.csv"

with open(csv_filename, mode="w", newline="") as file:
        writer = csv.writer(file, delimiter="\t")
        writer.writerow(["Doctor", "Patient", "Precision (Prompt 1)", "Recall (Prompt 1)", "F1-score (Prompt 1)", 
                         "Precision (Prompt 2)", "Recall (Prompt 2)", "F1-score (Prompt 2)", 
                         "Precision (Prompt 3)", "Recall (Prompt 3)", "F1-score (Prompt 3)",
                         "Precision (Prompt 4)", "Recall (Prompt 4)", "F1-score (Prompt 4)",
                         "Precision (Prompt 5)", "Recall (Prompt 5)", "F1-score (Prompt 5)"])

prompt = 1
row = list()
for i in range(len(parsed_normal)):
    run_id = parsed_normal[i]['custom_id']
    print(parsed_normal[i]['custom_id'].split('-'))
    print(parsed_reversed[i]['custom_id'].split('-'))

    doctor_index = run_id.split('-')[1]
    patient_index = run_id.split('-')[2]
    prompt_index = run_id.split('-')[3]

    print(f"Processing doctor: {doctor_index}, patient: {patient_index}, prompt: {prompt_index}.")

    if prompt == 1:
         row.append(doctor_index)
         row.append(patient_index)
    tp_normal,fp_normal,fn_normal = GetSummaryCounts(parsed_normal[i])
    tp_reversed,fp_reversed,fn_reversed = GetSummaryCounts(parsed_reversed[i])

    print(tp_normal,fp_normal,fn_normal)
    print(tp_reversed,fp_reversed,fn_reversed)
    print(prompt)

    metrics_normal = calculate_metrics(tp_normal,fp_normal,fn_normal)
    metrics_reversed = calculate_metrics(tp_reversed,fn_reversed,fp_reversed)

    avg_metrics = average_metrics(metrics_normal, metrics_reversed)

    # Format results for CSV
    row.append(round(avg_metrics["Precision"], 3))
    row.append(round(avg_metrics["Recall"], 3))
    row.append(round(avg_metrics["F1-score"], 3))
       
    if prompt == 5:
        with open(csv_filename, mode="a", newline="") as file:
            writer = csv.writer(file, delimiter="\t")  # Use tab separator
            writer.writerow(row)
        row = list()
        prompt = 1
    else:
        prompt += 1

### o3-mini-high multiple runs

In [None]:
csv_filename = "o3-mini-high-validations-2-way-multiple-runs.csv"

with open(csv_filename, mode="w", newline="") as file:
        writer = csv.writer(file, delimiter="\t")
        writer.writerow(["Doctor", "Patient", "Precision (Prompt 1)", "Recall (Prompt 1)", "F1-score (Prompt 1)", 
                         "Precision (Prompt 2)", "Recall (Prompt 2)", "F1-score (Prompt 2)", 
                         "Precision (Prompt 3)", "Recall (Prompt 3)", "F1-score (Prompt 3)",
                         "Precision (Prompt 4)", "Recall (Prompt 4)", "F1-score (Prompt 4)",
                         "Precision (Prompt 5)", "Recall (Prompt 5)", "F1-score (Prompt 5)"])


for file_number in range(1,11):

    with open(f"batch_output_10_runs_normal_o3-mini-high_{file_number}.jsonl", "r", encoding="utf-8") as f:
        parsed_normal = [json.loads(line) for line in f]

    with open(f"batch_output_10_runs_reversed_o3-mini-high_{file_number}.jsonl", "r", encoding="utf-8") as f:
        parsed_reversed = [json.loads(line) for line in f]


    prompt = 1
    row = list()
    for i in range(len(parsed_normal)):
        run_id = parsed_normal[i]['custom_id']


        doctor_index = run_id.split('-')[1]
        patient_index = run_id.split('-')[2]
        prompt_index = run_id.split('-')[3]

        print(f"Processing doctor: {doctor_index}, patient: {patient_index}, prompt: {prompt_index}.")

        if prompt == 1:
            row.append(doctor_index)
            row.append(patient_index)
        tp_normal,fp_normal,fn_normal = GetSummaryCounts(parsed_normal[i])
        tp_reversed,fp_reversed,fn_reversed = GetSummaryCounts(parsed_reversed[i])

        print(tp_normal,fp_normal,fn_normal)
        print(tp_reversed,fp_reversed,fn_reversed)
        print(prompt)

        metrics_normal = calculate_metrics(tp_normal,fp_normal,fn_normal)
        metrics_reversed = calculate_metrics(tp_reversed,fn_reversed,fp_reversed)

        avg_metrics = average_metrics(metrics_normal, metrics_reversed)

        # Format results for CSV
        row.append(round(avg_metrics["Precision"], 3))
        row.append(round(avg_metrics["Recall"], 3))
        row.append(round(avg_metrics["F1-score"], 3))
        
        if prompt == 5:
            with open(csv_filename, mode="a", newline="") as file:
                writer = csv.writer(file, delimiter="\t")  # Use tab separator
                writer.writerow(row)
            row = list()
            prompt = 1
        else:
            prompt += 1

## Copmaring different models

In [None]:
# The relevant input files have to be created first
models = ["o3-mini-high", "o3-mini-medium", "o3-mini-low", "o1", "gpt-4o"]

batch_ids = list()

for model in models:

    batch_input_file = client.files.create(
        file=open(f"batch_{model}.jsonl", "rb"),
        purpose="batch"
    )

    print(model)

    batch_input_file_id = batch_input_file.id
    batch = client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": f"comparing model {model}"
        }
    )
    print(batch.id)
    batch_ids.append(batch.id)



In [None]:
for batch_id in batch_ids:
    batch = client.batches.retrieve(batch_id)
    print(batch)
    print(batch.metadata)
    print(batch.output_file_id)
    print(batch.request_counts)

### Retrieve the relevant batch output

In [None]:
file_response = client.files.content("file-") # Get the corresponding output_file_id

model_name = "gpt-4o"
with open(f"batch_output_{model_name}_5.jsonl", "w", encoding="utf-8") as f:
    f.write(file_response.text)

### Different models 10 runs results to csv

In [None]:
import csv
model = "gpt-4o"

csv_filename = f"model_{model}_10_runs.csv"

with open(csv_filename, mode="w", newline="") as file:
        writer = csv.writer(file, delimiter="\t")
        writer.writerow(["Doctor", "Patient", "Precision", "Recall", "F1-score "])

for i in range(1,11):
    with open(f"batch_output_{model}_{i}.jsonl", "r", encoding="utf-8") as f:
        parsed = [json.loads(line) for line in f]

    row = list()
    for i in range(len(parsed)):
        run_id = parsed[i]['custom_id']

        doctor_index = run_id.split('-')[1]
        patient_index = run_id.split('-')[2]
        prompt_index = run_id.split('-')[3]

        print(f"Processing doctor: {doctor_index}, patient: {patient_index}, prompt: {prompt_index}.")


        row.append(doctor_index)
        row.append(patient_index)
        tp,fp,fn = GetSummaryCounts(parsed[i])

        print(tp,fp,fn)

        metrics = calculate_metrics(tp,fp,fn)

        row.append(round(metrics["Precision"], 3))
        row.append(round(metrics["Recall"], 3))
        row.append(round(metrics["F1-score"], 3))

        with open(csv_filename, mode="a", newline="") as file:
            writer = csv.writer(file, delimiter="\t")  # Use tab separator
            writer.writerow(row)
        row = list()