# OpenAI results retrieval 

OpenAI Batch jobs need to be retrieved once they are done running.

In [None]:
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI()

batch_id = "batch_680a34fdd46c8190934c95a2cb1c19d2"
file_id = "file-Rc7n6bUMUMhoTxW7bPX9VQ "

batch = client.batches.retrieve(batch_id)
print(batch.status) # Check status. Should be 'completed'

Once completed, retrieve with:

In [None]:
file_response = client.files.content(file_id=file_id)
print(file_response.text)

Save as DataFrame

In [None]:
import json
import pandas as pd

data = [json.loads(line) for line in file_response.text]
    
def get_content(data: dict) -> str:
    return data['body']['messages'][1]['content']

def get_response(data: dict) -> str:
    messages = data['body']['messages']
    if len(messages) > 2:
        return messages[2]['content']
    else:
        return "ERROR 404: No response found"

df = pd.DataFrame({
    'prompt': [ get_content(d)[0]['text'] for d in data ],
    'image_url': [ get_content(d)[1]['image_url'] for d in data ],
    'response': [ get_response(d) for d in data ]
})
df.head()

                                           prompt  \
0        How many sea shells are in this picture?   
1  How many hot air balloons are in this picture?   
2  How many hot air balloons are in this picture?   
3  How many hot air balloons are in this picture?   
4  How many hot air balloons are in this picture?   

                                           image_url  \
0  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   
1  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   
2  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   
3  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   
4  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   

                       response  
0  ERROR 404: No response found  
1  ERROR 404: No response found  
2  ERROR 404: No response found  
3  ERROR 404: No response found  
4  ERROR 404: No response found  


Save as csv:

In [None]:
import os, time

DIR = '../eval/results/openai'

timestamp = time.strftime("%Y-%m-%d_%H-%M-%S")
model = data[0]['body']['model'].replace("/", "_")

os.makedirs(DIR + '/' + model, exist_ok=True)
df.to_csv(f"{DIR}/{model}/{timestamp}.csv", index=False)