In [1]:
import json
from openai import AzureOpenAI
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime
import os

In [2]:
load_dotenv('../.env')

True

In [3]:
TASK_NAME = "relevance_check_v0"

def get_run_id():
    return os.getenv('RUNID')

RUNID = get_run_id()

INPUT_DATA_PATH = f"../local_tests_data/azure_openai_batch_processing_files/{RUNID}/{TASK_NAME}/BATCHID"

RUN_TIME = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')

OUTPUT_DATA_PATH = f"../local_tests_data/azure_openai_batch_processing_files/{RUNID}/{TASK_NAME}/OUTPUTS"
ERROR_DATA_PATH = f"../local_tests_data/azure_openai_batch_processing_files/{RUNID}/{TASK_NAME}/ERRORS"

os.makedirs(OUTPUT_DATA_PATH, exist_ok=True)
os.makedirs(ERROR_DATA_PATH, exist_ok=True)

print(f"Run ID: {RUNID} at {RUN_TIME}")

Run ID: RUNID_2 at 2025-06-02 15:42:43


In [4]:
AZURE_OPENAI_API_KEY=os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT=os.getenv('AZURE_OPENAI_ENDPOINT')
client = AzureOpenAI(
    api_version="2024-12-01-preview",
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY
)

Download output files

In [5]:
def get_batchid_filenames():
    batchids = []
    for filename in os.listdir(INPUT_DATA_PATH):
        if filename.endswith('.txt'):
            b = {
                'path': os.path.join(INPUT_DATA_PATH, filename),
                'id' : filename.split('.')[0].split('_')[-1],
            }
            batchids.append(b)
    return batchids

batchids = get_batchid_filenames()

In [6]:
print(batchids)

[{'path': '../local_tests_data/azure_openai_batch_processing_files/RUNID_2/relevance_check_v0/BATCHID/RUNID_2--relevance_check_v0_BATCHID_0.txt', 'id': '0'}]


In [7]:
def save_output_file(i, output):
    output_filename = f"{RUNID}--{TASK_NAME}_OUTPUT_{i}.jsonl"
    output_path = os.path.join(OUTPUT_DATA_PATH, output_filename)
    with open(output_path, 'w') as f:
        f.write(output)
    print(f"Output saved to {output_path}")

def save_error_file(i, error):
    error_filename = f"{RUNID}--{TASK_NAME}_ERROR_{i}.jsonl"
    error_path = os.path.join(ERROR_DATA_PATH, error_filename)
    with open(error_path, 'w') as f:
        f.write(error)
    print(f"Error saved to {error_path}")

In [8]:
for b in batchids:

    batchid_filename = b['path']
    i = b['id']
    
    print(batchid_filename)
    with open(batchid_filename, 'r') as f:
        batch_id = f.read().strip()

    batch_obj = client.batches.retrieve(batch_id)
    batch_status = batch_obj.status

    if batch_status != "completed":
        print(f"Batch {batch_id} is not completed. Status: {batch_status}")
        continue

    output_file_id = batch_obj.output_file_id
    if output_file_id:
        output = client.files.content(output_file_id).text.strip()
        if output:
            save_output_file(i, output)

    error_file_id = batch_obj.error_file_id
    if error_file_id:
        error_content = client.files.content(error_file_id).text.strip()
        if error_content:
            save_error_file(i, error_content)


../local_tests_data/azure_openai_batch_processing_files/RUNID_2/relevance_check_v0/BATCHID/RUNID_2--relevance_check_v0_BATCHID_0.txt
Output saved to ../local_tests_data/azure_openai_batch_processing_files/RUNID_2/relevance_check_v0/OUTPUTS/RUNID_2--relevance_check_v0_OUTPUT_0.jsonl
