In [2]:
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT")).resolve() # type: ignore
MODEL_ROOT = Path(os.getenv("MODEL_ROOT")).resolve() # type: ignore
DATA_ROOT = Path(os.getenv("DATA_ROOT")).resolve() # type: ignore
CONFIG_ROOT = Path(os.getenv("CONFIG_ROOT")).resolve() # type: ignore
SRC_ROOT = Path(os.getenv("SRC_ROOT")).resolve() # type: ignore

os.environ["CUDA_VISIBLE_DEVICES"] = "5"
sys.path.append(str(SRC_ROOT))

from openai import OpenAI

from datasets import Dataset
import json
from utils.utility import *

from omegaconf import OmegaConf

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
)

In [5]:
response = client.responses.create(
    model="gpt-5-mini",
    input="hello",
    reasoning={
        "effort": "minimal"
    },
    max_output_tokens=128
)
print(response.output)

Response(id='resp_68c04c5b38b481999bedc167f30682d50d4f30cbc5d453b3', created_at=1757432923.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-5-mini-2025-08-07', object='response', output=[ResponseReasoningItem(id='rs_68c04c5be7408199bdf4a8263f6b74120d4f30cbc5d453b3', summary=[], type='reasoning', content=None, encrypted_content=None, status=None), ResponseOutputMessage(id='msg_68c04c5c0dfc8199a845fd21ae8c93480d4f30cbc5d453b3', content=[ResponseOutputText(annotations=[], text='Hello! How can I help you today?', type='output_text', logprobs=[])], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, conversation=None, max_output_tokens=128, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, reasoning=Reasoning(effort='minimal', generate_summary=None, summary=None), safety_identifier=None, service_tier='default', status='

In [10]:
print(response.output[1].content[0].text)

Hello! How can I help you today?


In [2]:
batch_list = client.batches.list(limit=100)
print(*batch_list.data[0:5], sep='\n')

Batch(id='batch_68be4c7a56ac819088db9fbf44777491', completion_window='24h', created_at=1757301882, endpoint='/v1/responses', input_file_id='file-8uRxJhZJhPmpcgMpv9sYrd', object='batch', status='failed', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=Errors(data=[BatchError(code='token_limit_exceeded', line=None, message='Enqueued token limit reached for gpt-5-mini in organization org-4zGMPv7mHWxPevL6TmmXK8o8. Limit: 40,000,000 enqueued tokens. Please try again once some in_progress batches have been completed.', param=None)], object='list'), expired_at=None, expires_at=1757388282, failed_at=1757301884, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0), usage={'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'input_tokens_details': {'cached_tokens': 0}, 'output_tokens_details': {'reasoning_tokens': 0}})
Batch(id='batch_68be4c61d0f88190bf95704fee75b3

In [3]:
batch_list = [batch for batch in batch_list if batch.status == "completed"]
print(*batch_list[0:5], sep='\n')
print(len(batch_list))

Batch(id='batch_68be4a8163448190ae7be72c6257a560', completion_window='24h', created_at=1757301377, endpoint='/v1/responses', input_file_id='file-VDLkhdmiEWQUV3XvMbtZfF', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1757301796, error_file_id=None, errors=None, expired_at=None, expires_at=1757387777, failed_at=None, finalizing_at=1757301637, in_progress_at=1757301379, metadata=None, output_file_id='file-SXxpw9bFvMJQb1YT6GxGNo', request_counts=BatchRequestCounts(completed=2000, failed=0, total=2000), usage={'input_tokens': 1965288, 'output_tokens': 132373, 'total_tokens': 2097661, 'input_tokens_details': {'cached_tokens': 278912}, 'output_tokens_details': {'reasoning_tokens': 0}})
Batch(id='batch_68be486f3508819089af52afc4ba0053', completion_window='24h', created_at=1757300847, endpoint='/v1/responses', input_file_id='file-KHJG7Lr19UH12odpdVVA7S', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1757301289, 

In [4]:
retrieved_text = []
for i, batch in enumerate(batch_list):
    if batch.output_file_id is not None:
        file = client.files.content(batch.output_file_id)
        line = file.text.split('\n')[0]
        if len(line.split('"custom_id": "')[1].split('"')[0].split(' ')) == 2: 
            retrieved_text.append(file.text)
        else:
            break
    sys.stdout.write(f"\rchecked {i}th batch {line[:100]}")
    sys.stdout.flush()

checked 183th batch {"id": "batch_req_68ba78d4d90081909d8f457763cd573e", "custom_id": "140000 True", "response": {"statu

In [5]:
recoverable = ''.join(retrieved_text[0:70])
print(recoverable[0:200])

{"id": "batch_req_68be4b86ea008190bd278f94a4ad2020", "custom_id": "288000 False", "response": {"status_code": 200, "request_id": "b5f7981f76f8df2fb8ebb9f0eb6de1ef", "body": {"id": "resp_68be4ab0faa081


In [17]:
with open('recovering.jsonl', 'w') as file:
    file.write(recoverable)

In [18]:
with open('recovering.jsonl', 'r') as file:
    recoverable = file.read()

In [8]:
config = OmegaConf.load(CONFIG_ROOT / "experiment1.yaml")

In [9]:
gen_filename = "generated_pairwise_openai_2025-08-27_06-03-12.json"
gen_output_path = DATA_ROOT / config.dataset_output_dir / gen_filename
print(f"Loading generated ouputs from {str(gen_output_path)}...")
with open(gen_output_path, "r", encoding="utf-8") as f:
    dataset = json.load(f)
dataset = Dataset.from_dict(dataset)
print("Loaded successfully")

Loading generated ouputs from /src/gs25009/LLM_DAG_ALLIGN/dataset/preprocessed/generated_pairwise_openai_2025-08-27_06-03-12.json...
Loaded successfully


In [10]:
def generate_comparisons(dataset: Dataset) -> list[dict]:
    pairs = []
    for k, example in enumerate(dataset):
        prompt = example['prompt'] # type: ignore
        ref = ""
        summaries = example['summaries'] # type: ignore
        
        for i, y1 in enumerate(summaries):
            for j, y2 in enumerate(summaries):
                if i < j:
                    pairs.append({
                        'prompt': prompt,
                        'y1': y1,
                        'y2': y2,
                        'ref': ref,
                        'meta': f"{k}, {i}, {j}"
                    })
    
    return pairs
comparisons = generate_comparisons(dataset)

In [11]:
result = [[pair['meta'], None] for pair in comparisons]
print(*result[0:5], sep='\n')

['0, 0, 1', None]
['0, 0, 2', None]
['0, 0, 3', None]
['0, 0, 4', None]
['0, 0, 5', None]


In [19]:
import regex as re

pattern = config.scorer.openai.preference_pattern
def _parse_output_line(line: str) -> tuple[int, int | None]:
    obj = json.loads(line)
    idx = int(obj.get("custom_id").split()[0])  # came from compare_batch_0
    body = obj.get("response", {}).get("body", {})
    output = body.get("output", [])[1].get("content", "")[0]
    if not output:
        return idx, None
    output_text = output.get("text", "")
    match = re.search(pattern, output_text)
    if not match:
        return idx, None
    # Map '1' -> 0 (first shown), '2' -> 1 (second shown)
    judged_idx = 0 if match.group(1) == "1" else 1
    # Convert back to original y1/y2 indexing using recorded swap info
    swapped = obj.get("custom_id").split()[1] == "True"
    orig_idx = (1 - judged_idx) if swapped else judged_idx
    return idx, orig_idx

for i, line in enumerate(recoverable.split('\n')):
    if len(line) > 0:
        idx, comp = _parse_output_line(line)
        result[idx][1] = comp
    

In [20]:
filename = get_filename(
    "comparison",
    config.builder.type,
    config.scorer.type,
    suffix=".jsonl",
)
output_path = DATA_ROOT / config.dataset_output_dir / filename

print(f"Saving result to {str(output_path)}...")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(str(output_path), "w", encoding="utf-8") as f:
    for pair, compare in result:
        line = json.dumps({"id": pair, "result": str(compare)}, ensure_ascii=False)
        f.write(line + "\n")
print("Saved successfully.")

Saving result to /src/gs25009/LLM_DAG_ALLIGN/dataset/preprocessed/comparison_cyclic_openai_2025-09-09_09-07-47.jsonl...
Saved successfully.


In [14]:
print(comparisons[0])

{'prompt': "Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where animals have more rights to space and food than humans,' said Charlie Leocha, consumer representative on the committee.\xa0'It is time that the DOT and FAA take a stand for humane treatment of passengers.' But could crowding on planes lead to more serious issues than figh

python3 preprocess_comparisons.py experiment1.yaml generated_pairwise_openai_2025-08-27_06-03-12.json -o comparison_cyclic_openai_2025-09-09_09-03-27.jsonl
352661
352069