In [1]:
"""
Download a finished OpenAI Batch run and pivot the JSONL responses into one JSON file.
"""
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Any, Dict, List

from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd

load_dotenv(override=True)

# Set these to whatever is convenient before running the script.

PROJECT_ROOT = Path.cwd().parent  # notebook runs from code/, so step up once
RAW_PATH = PROJECT_ROOT / "results" / "4_5_batch_output.jsonl"
JSON_PATH = PROJECT_ROOT / "results" / "4_5_batch_output.json"
out_csv_path = PROJECT_ROOT / "results" / "4_5_batch_output.csv"
BATCH_ID = os.environ.get("OPENAI_BATCH_ID") or "batch_690ae971d7048190a982f22185698051"


In [2]:
client = OpenAI()

In [3]:
batch_id = {
  "file_id": "file-1kVnWo4YX6ZKE9Qs1ZD4tm",
  "batch_id": "batch_690ae971d7048190a982f22185698051",
  "status": "validating"
}

BATCH_ID = batch_id["batch_id"]


# Check if Batch is ready 

In [4]:
batch = client.batches.retrieve(BATCH_ID).model_dump()
if batch.get("status") != "completed":
    raise SystemExit(f"Batch {BATCH_ID} is not complete yet (status={batch.get('status')!r}).")

batch

{'id': 'batch_690ae971d7048190a982f22185698051',
 'completion_window': '24h',
 'created_at': 1762322801,
 'endpoint': '/v1/responses',
 'input_file_id': 'file-1kVnWo4YX6ZKE9Qs1ZD4tm',
 'object': 'batch',
 'status': 'completed',
 'cancelled_at': None,
 'cancelling_at': None,
 'completed_at': 1762323597,
 'error_file_id': None,
 'errors': None,
 'expired_at': None,
 'expires_at': 1762409201,
 'failed_at': None,
 'finalizing_at': 1762323590,
 'in_progress_at': 1762322864,
 'metadata': {'app': 'safety_rag_eval', 'kind': 'judge_batch'},
 'model': 'gpt-5-2025-08-07',
 'output_file_id': 'file-Wo1ThJxQrGZ1GngGtDx9ec',
 'request_counts': {'completed': 48, 'failed': 0, 'total': 48},
 'usage': {'input_tokens': 315194,
  'input_tokens_details': {'cached_tokens': 99072},
  'output_tokens': 48001,
  'output_tokens_details': {'reasoning_tokens': 36992},
  'total_tokens': 363195}}

In [5]:
output_file_id = batch.get("output_file_id")
if not output_file_id:
    raise SystemExit(f"Batch {BATCH_ID} does not expose an output_file_id.")

In [6]:
RAW_PATH.parent.mkdir(parents=True, exist_ok=True)
JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
out_csv_path.parent.mkdir(parents=True, exist_ok=True)

In [7]:
"""

with client.files.with_streaming_response.content(output_file_id) as stream:
    stream.stream_to_file(RAW_PATH)0_ur5e_multiple_pdfs.py
    
    """

'\n\nwith client.files.with_streaming_response.content(output_file_id) as stream:\n    stream.stream_to_file(RAW_PATH)0_ur5e_multiple_pdfs.py\n\n    '

# Load Jsonl

In [8]:
def _load_jsonl(path: Path) -> List[Dict[str, Any]]:
    """Load a JSONL file from disk."""
    rows: List[Dict[str, Any]] = []
    with path.open("r", encoding="utf-8") as handle:
        for line in handle:
            line = line.strip()
            if line:
                rows.append(json.loads(line))
    return rows

In [9]:
raw_records = _load_jsonl(RAW_PATH)

In [10]:
from typing import Any, Dict

def extract_record_info(record: Dict[str, Any]) -> Dict[str, Any]:
    """Extracts key fields from a single batch record."""
    
    # Initialize output dictionary
    info = {
        "custom_id": record.get("custom_id"),
        "text": None,
        "judge_model": None,
        "temperature": None,
        "permutation_id": None,
    }

    # Navigate safely into nested response/body
    response = record.get("response") or {}
    body = response.get("body") or {}

    # Model and temperature
    info["judge_model"] = body.get("model")
    info["temperature"] = body.get("temperature")

    # Metadata â†’ permutation_id
    metadata = body.get("metadata") or {}
    info["permutation_id"] = metadata.get("permutation_id")

    # Extract text from body["output"]
    text_parts = []
    for item in body.get("output") or []:
        if item.get("type") == "message":
            for content in item.get("content") or []:
                if content.get("type") == "output_text" and content.get("text"):
                    text_parts.append(content["text"].strip())
    info["text"] = "\n".join(text_parts) if text_parts else None


    return info



In [None]:
namespace = {}
rag_script_path = PROJECT_ROOT / "code" / "3_rag_exp_with_evals.py"
with rag_script_path.open("r", encoding="utf-8") as f:
    exec(f.read(), namespace)
extract_boolean_answer = namespace["extract_boolean_answer"]

FileNotFoundError: [Errno 2] No such file or directory: 'code/2_rag.py'

In [19]:
import re
def extract_boolean_answer(text: str, prefix_word: str) -> str:
    if text is None:
        return None
    match = re.search(rf"((?<={prefix_word}:\s)|(?<={prefix_word}:))(True|False)", text)
    if match is None or match.group(0) is None:
        return None
    return match.group(0)

In [15]:
def extract_judge_type(custom_id: str) -> str | None:
    """Return the judge_type part from a custom_id like 'qa123__doc_relevance'."""
    if custom_id and "__" in custom_id:
        _, judge_type = custom_id.split("__", 1)
        return judge_type
    return None


# csv

In [34]:
import pandas as pd
from pathlib import Path



write_header = True  # Only write header the first time

mapping_judge_type_key = {
    "doc_relevance": "Relevance",
    "correctness_vs_ref": "Correctness",
    "helpfulness": "Relevance",
    "faithfulness": "Grounded",
}

for record in raw_records:
    rec = extract_record_info(record)

    custom_id = rec.get("custom_id")
    judge_type = extract_judge_type(custom_id)
    judge_answer = extract_boolean_answer(rec.get("text"), mapping_judge_type_key.get(judge_type, "") )

    row = {
        "custom_id": rec.get("custom_id"),
        "text": rec.get("text"),
        "judge_model": rec.get("model"),          
        "temperature": rec.get("temperature"),
        "permutation_id": rec.get("permutation_id"),
        "judge_type": judge_type,
        "judge_answer": judge_answer
        
    }

    # Append one row to the CSV
    pd.DataFrame([row]).to_csv(
        out_csv_path,
        mode="a", # use "w" to overwrite for demonstration; use "a" to append in real use
        header=write_header,
        index=False,
        encoding="utf-8"
    )

    write_header = False  # after first write, no more headers


print(f"Wrote parsed batch results to {out_csv_path}")

Wrote parsed batch results to c:\Users\singhr7\Documents\safety_rag_evaluation_ryan\results\4_5_batch_output.csv
