In [None]:
from openai import OpenAI
from pydantic import BaseModel
import functions.prompts as prompts
import functions.llm as llm
import functions.fetch as fetch
import pandas as pd
from openai.lib._parsing._completions import type_to_response_format_param

import importlib
importlib.reload(llm)
importlib.reload(prompts)
importlib.reload(fetch)

client = OpenAI()

In [None]:
class Note(BaseModel):
    type: str
    description: str

class AbstractSummary(BaseModel):
    keywords: list[str]
    summaries: list[str]
    note: Note

In [None]:
df = pd.read_csv("./data/abstract/abstract.csv")

reqs = []
for i, row in df.iterrows():
    reqs.append(llm.wrap(f"paper-{i}", {
        "model": "o3-mini",
        "messages": [{
            "role": "system",
            "content": prompts.findings
        }, {
            "role": "user",
            "content": row.title + "\n" + row.abstract
        }],
        "response_format": type_to_response_format_param(AbstractSummary)
    }))

req_json_path = "data/findings/req.jsonl"
llm.gen_batch_jsonl(req_json_path, reqs)

In [None]:
batch_input_file = client.files.create(
    file=open(req_json_path, "rb"),
    purpose="batch"
)

print(f"File ID: {batch_input_file.id}")

batch_object = client.batches.create(
    input_file_id=batch_input_file.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": f"Atlas of Human-AI Interaction"
    }
)

print(f"Batch ID: {batch_object.id}")

fetch.save(f"File ID: {batch_input_file.id} \nBatch ID: {batch_object.id}", "data/findings/req_file_id.txt")

In [None]:
client.batches.retrieve(batch_object.id)