## First, let's load the test set

In [9]:
%load_ext autoreload
%autoreload 2

from averitec import Datapoint
from evidence_generation import EvidenceGenerationResult, GptEvidenceGenerator
from classification import DefaultClassifier
from retrieval import RetrievalResult, SimpleFaissRetriever
from pipeline import Pipeline, PipelineResult
import json, random, pickle
from tqdm.notebook import tqdm

random.seed(111)
batch = []
split = "dev"
path = "/mnt/data/factcheck/averitec-data/"
with open(path + f"data/{split}.json") as f:
    dataset = json.load(f)
    for i in range(len(dataset)):
        dataset[i]["claim_id"] = i
    datapoints = [Datapoint.from_dict(d) for d in dataset]
    
    
class GptBatchedEvidenceGenerator(GptEvidenceGenerator):
    def __init__(self, model="gpt-4o", client= None):
        super().__init__(model, client)
        self.batch = []
        self.fallback_gpt_generator = GptEvidenceGenerator()
    
    def get_batch_dict(self, datapoint: Datapoint, retrieval_result: RetrievalResult):
        system_prompt = self.format_system_prompt(retrieval_result)
        user_prompt = datapoint.claim
        return {
            "custom_id": f"{split}-{datapoint.claim_id}", 
            "method": "POST", "url": "/v1/chat/completions", 
            "body": {
                "model": "gpt-4o",
                #"model": "gpt-3.5-turbo-0125",
                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
            "temperature": 0}}
    
    def __call__(self, datapoint: Datapoint, retrieval_result: RetrievalResult, *args, **kwargs) -> EvidenceGenerationResult:
        self.batch.append(self.get_batch_dict(datapoint, retrieval_result))
        return EvidenceGenerationResult(evidences=[],metadata={"suggested_label":[0,0,0,0]})
    
    def update_pipeline_result(self, pipeline_result, gpt_result, classifier):
        self.last_llm_output = gpt_result
        gpt_data = self.parse_json(gpt_result)
        try:
            evidence_generation_result = EvidenceGenerationResult(
                evidences=self.parse_evidence(gpt_data["questions"], pipeline_result.retrieval_result),
                metadata={
                    "suggested_label": self.parse_label_probabilities(gpt_data["claim_veracity"]),
                    "llm_type": self.client.model,
                    "llm_output": gpt_data,
                }
            )
        except:
            print("failed, using fallback gpt")
            evidence_generation_result = self.fallback_gpt_generator(pipeline_result.datapoint, pipeline_result.retrieval_result)
        return PipelineResult(
            datapoint=pipeline_result.datapoint,
            retrieval_result=pipeline_result.retrieval_result,
            evidence_generation_result=evidence_generation_result,
            classification_result=classifier(pipeline_result.datapoint, evidence_generation_result,pipeline_result.retrieval_result)
        )

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
pipeline = Pipeline(
    SimpleFaissRetriever(path + "data_store/vecstore/dev/6k"), # TODO: TRIPLE CHECK BEFORE EVERY LAUNCH
    GptBatchedEvidenceGenerator(), 
    DefaultClassifier()
)

In [11]:
len(datapoints)

500

In [12]:
submission = []
dump = []

for dp in tqdm(datapoints):
    pipeline_result = pipeline(dp)
    submission.append(pipeline_result.to_submission())
    dump.append(pipeline_result)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submission_{split}.json", "w") as f:
    json.dump(submission, f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/pipeline_result_{split}.pkl", "wb") as f:
    pickle.dump(dump, f)

  0%|          | 0/500 [00:00<?, ?it/s]

In [15]:
from openai import OpenAI
import time

client = OpenAI()

# save as jsonl
# divide into batches of 100
batch_size = 100
for i in range(1, len(datapoints) // batch_size + 1):
    path = "/mnt/data/factcheck/averitec-data/data_store/batch_jobs/wednesday/"
    name = f"batch_{split}_{i}.jsonl"
    bfile = path + name
    outfile = path + "output/" + name
    
    with open(bfile, "w") as f:
        for dp in pipeline.evidence_generator.batch[i * batch_size : (i + 1) * batch_size]:
            f.write(json.dumps(dp) + "\n")

    batch_input_file = client.files.create(file=open(bfile, "rb"), purpose="batch")

    batch = client.batches.create(
        input_file_id=batch_input_file.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": f"dev-set job, batch {i}",
        },
    )
    print(batch)
    while True:
        batch = client.batches.retrieve(batch.id)
        if batch.status == "completed":
            break
        time.sleep(10)
        print("waiting for batch to complete",batch.request_counts, batch.id)
    print(f"batch {i} completed")
    file_response = client.files.content(batch.output_file_id)
    print(file_response.text)
    # save
    with open(outfile, "w") as f:
        f.write(file_response.text)
    

Batch(id='batch_UVmAbw32eVxcEGDZ2dPPcOxY', completion_window='24h', created_at=1721798775, endpoint='/v1/chat/completions', input_file_id='file-i5TBHbTg1kB1k37TLlztPMJx', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1721885175, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 1'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_UVmAbw32eVxcEGDZ2dPPcOxY
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=100) batch_UVmAbw32eVxcEGDZ2dPPcOxY
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=100) batch_UVmAbw32eVxcEGDZ2dPPcOxY
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=100) batch_UVmAbw32eVxcEGDZ2dPPcOxY
waiting for 

BadRequestError: Error code: 400 - {'error': {'message': 'Invalid file format for Batch API. Must be .jsonl', 'type': 'invalid_request_error', 'param': None, 'code': None}}

In [16]:
path = "/mnt/data/factcheck/averitec-data/data_store/batch_jobs/wednesday/"
data = ""
for i in range(5):
    # join batched files
    with open(f"{path}output/batch_{split}_{i}.jsonl") as f:
        data += f.read()
with open(f"{path}_output.jsonl", "w") as f:
    f.write(data)

In [14]:
    with open(outfile, "w") as f:
        f.write(file_response.text)

In [17]:
len(pipeline.evidence_generator.batch),len(datapoints)

(2215, 2215)

In [25]:
from openai import OpenAI
client = OpenAI()

batch_input_file = client.files.create(
  file=open("/mnt/data/factcheck/averitec-data/data_store/batch_jobs/tuesday/batch_test_0.jsonl", "rb"),
  purpose="batch"
)

batch_input_file_id = batch_input_file.id

client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
      "description": "test-set 1st time job"
    }
)

Batch(id='batch_ZxHjClCCyggB5E2WN0elTIgf', completion_window='24h', created_at=1721766209, endpoint='/v1/chat/completions', input_file_id='file-ZrXBLXoG28ri5ZgmzbBmBUP5', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1721852609, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'test-set 1st time job'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

## Check status

In [29]:
from openai import OpenAI
client = OpenAI()

batch = client.batches.retrieve("batch_ZxHjClCCyggB5E2WN0elTIgf")
batch

Batch(id='batch_ZxHjClCCyggB5E2WN0elTIgf', completion_window='24h', created_at=1721766209, endpoint='/v1/chat/completions', input_file_id='file-ZrXBLXoG28ri5ZgmzbBmBUP5', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1721766235, error_file_id=None, errors=None, expired_at=None, expires_at=1721852609, failed_at=None, finalizing_at=1721766234, in_progress_at=1721766210, metadata={'description': 'test-set 1st time job'}, output_file_id='file-9Ht5MJmaPcH5qXi5Tb9jbgFZ', request_counts=BatchRequestCounts(completed=4, failed=0, total=4))

In [13]:
from openai import OpenAI
client = OpenAI()

file_response = client.files.content(batch.output_file_id)
print(file_response.text)
# save
with open(path+f"data_store/batch_jobs/batch_{split}-tuesday-top5-output.jsonl", "w") as f:
    f.write(file_response.text)

ValueError: Expected a non-empty value for `file_id` but received None

# Propagate back

In [17]:
# parse from jsonl 
# file_response.text
batch_results = []
for line in data.split("\n")[:-1]:
    batch_results.append(json.loads(line))

In [18]:
print(pipeline.evidence_generator.last_llm_output)

None


In [19]:
new_dump = []
for pipeline_result, batch_result in zip(dump[:len(batch_results)], batch_results):
    new_result = pipeline.evidence_generator.update_pipeline_result(pipeline_result, batch_result["response"]["body"]["choices"][0]["message"]["content"], pipeline.classifier)
    new_dump.append(new_result)

In [22]:
split

'dev'

In [20]:
with open(f"/mnt/data/factcheck/averitec-data/data_store/submission_{split}.json", "w") as f:
    json.dump([d.to_submission() for d in new_dump], f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/pipeline_result_{split}.pkl", "wb") as f:
    pickle.dump(new_dump, f)

In [21]:
new_dump[0].evidence_generation_result.metadata

{'suggested_label': array([0.94791499, 0.01736167, 0.01736167, 0.01736167]),
 'llm_type': 'gpt-4o',
 'llm_output': {'questions': [{'question': 'Did Sean Connery write a letter to Steve Jobs?',
    'answer': 'No, the letter was fake.',
    'source': '1'},
   {'question': 'Was the letter purportedly from Sean Connery to Steve Jobs published online?',
    'answer': 'Yes, it was published online.',
    'source': '1'},
   {'question': 'Did the letter claim that Sean Connery refused to appear in an Apple commercial?',
    'answer': 'Yes, the letter claimed that Sean Connery refused to appear in an Apple commercial.',
    'source': '1'},
   {'question': 'Was the letter part of a satirical article?',
    'answer': 'Yes, the letter was part of a satirical article.',
    'source': '2'},
   {'question': 'Which website originally published the fake letter?',
    'answer': 'The satirical website Scoopertino originally published the fake letter.',
    'source': '2'},
   {'question': 'Did the letter 