In [1]:
"""
Download a finished OpenAI Batch run and pivot the JSONL responses into one JSON file.
"""
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Any, Dict, List

from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd

load_dotenv(override=True)

# Set these to whatever is convenient before running the script.

PROJECT_ROOT = Path.cwd().parent  # notebook runs from code/, so step up once
RAW_PATH = PROJECT_ROOT / "results" / "4_5_batch_output.jsonl"
JSON_PATH = PROJECT_ROOT / "results" / "4_5_batch_output.json"
out_csv_path = PROJECT_ROOT / "results" / "4_5_batch_output.csv"
BATCH_ID = os.environ.get("OPENAI_BATCH_ID") or "batch_690ae971d7048190a982f22185698051"


In [18]:
judge_results_df = pd.read_csv(out_csv_path)

In [24]:
df_wide = judge_results_df.pivot_table(
    index=['custom_id', 'permutation_id'],
    columns='judge_type',
    values=['text', 'judge_answer'],
    aggfunc='first'
)

# Flatten column names
df_wide.columns = [f"{jtype}_{field}" for field, jtype in df_wide.columns]
df_wide = df_wide.reset_index()


In [22]:
pivoted = judge_results_df.pivot_table(
    index=['custom_id', 'permutation_id'],   # each row
    columns=['judge_type', 'judge_model'],   # create grouped columns
    values=['text', 'judge_answer'],         # what fills them
    aggfunc='first'                          # in case of duplicates
)
pivoted

custom_id,permutation_id


In [17]:
judge_results_df.columns

Index(['custom_id', 'text', 'judge_model', 'temperature', 'permutation_id',
       'judge_type', 'judge_answer'],
      dtype='object')

In [28]:
import pandas as pd

# Assuming judge_results_df has:
# 'permutation_id', 'judge_type', 'judge_answer', 'judge_text'

pivot_df = (
    judge_results_df
    .pivot(
        index='permutation_id', 
        columns='judge_type', 
        values=['judge_answer', 'text'])
    .reset_index()
)

# flatten the multiindex columns
pivot_df.columns = [
    f"{col1}_{col2}" if col2 else col1
    for col1, col2 in pivot_df.columns
]

pivot_df.head()


Unnamed: 0,permutation_id,judge_answer_correctness_vs_ref,judge_answer_doc_relevance,judge_answer_faithfulness,judge_answer_helpfulness,text_correctness_vs_ref,text_doc_relevance,text_faithfulness,text_helpfulness
0,okg5aalok03t05_0001,False,,True,True,Step-by-step reasoning:\n- The ground truth st...,Step-by-step reasoning:\n\n- Fact set 1 (013__...,Grounded: True\n\nStep-by-step reasoning:\n- I...,Step-by-step reasoning:\n- The question asks f...
1,okg5aalok03t05_0002,True,True,True,True,Step-by-step reasoning:\n- The stopping time s...,Step-by-step reasoning:\n- The question asks a...,Grounded: True\n\nStep-by-step reasoning:\n- T...,Relevance: True\n\nStep-by-step reasoning:\n- ...
2,okg5aalok03t05_0003,True,True,True,True,Correctness: True\n\nExplanation:\n- The groun...,Step-by-step reasoning:\n- The question asks w...,Step-by-step reasoning:\n- The FACTS specify a...,Step-by-step reasoning:\n- The question asks w...
3,okg5aalok03t07_0007,False,True,True,True,Step-by-step reasoning:\n- The ground truth st...,Step-by-step reasoning:\n- The question asks a...,Step 1: Identify the student’s claims\n- Claim...,Step-by-step reasoning:\n- The question asks f...
4,okg5aalok03t07_0008,True,True,True,True,Correctness: True\n\nReasoning:\n- The stoppin...,Step-by-step reasoning:\n- The question asks a...,Grounded: True\n\nStep-by-step reasoning:\n- T...,Step-by-step reasoning:\n- The question sets a...
