In [1]:
import sys
from pathlib import Path

BASEDIR = Path("/workspaces/HARP/") / "src"  # Replace with your own basedir path for the repo

sys.path.insert(0, str(BASEDIR))

In [2]:
from __future__ import annotations

import copy
import itertools
import json
import math
import os
import pickle
import pprint
import re
import textwrap
import time
import traceback
from collections import Counter, defaultdict
from typing import Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import Markdown, clear_output, display
from tqdm.auto import tqdm

import anthropic

In [3]:
from eval.eval import create_batch
from eval.prompts import *
from eval.utils import read_jsonl, write_jsonl, get_uid

# Data

In [4]:
dataset = read_jsonl(BASEDIR / "data/processed/HARP.jsonl")
dataset_map = {get_uid(p): p for p in dataset}
len(dataset)

4780

# Run eval

In [5]:
client = anthropic.Anthropic()

## Create batch

In [6]:
batch = create_batch(
    dataset,
    api="anthropic",
    model="claude-3-5-sonnet-20241022",
    fewshot_messages=[],
    system_prompt=custom_claude_sysprompt,
    max_tokens=2048,
    temperature=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    top_p=None,
)
write_jsonl(batch, BASEDIR / "inputs/short_answer/claude-3-5-sonnet-20241022/batch.jsonl")

## Run batch

In [30]:
message_batch = client.beta.messages.batches.create(requests=batch)
print(message_batch)

BetaMessageBatch(id='msgbatch_01PX3X1QRaJdP94MujZg2pVb', cancel_initiated_at=None, created_at=datetime.datetime(2024, 11, 2, 5, 51, 0, 604839, tzinfo=datetime.timezone.utc), ended_at=None, expires_at=datetime.datetime(2024, 11, 3, 5, 51, 0, 604839, tzinfo=datetime.timezone.utc), processing_status='in_progress', request_counts=BetaMessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=4797, succeeded=0), results_url=None, type='message_batch', archived_at=None)


In [None]:
BATCH_NAME = "msgbatch_01PX3X1QRaJdP94MujZg2pVb"  # Replace with your own

## Check

In [13]:
message_batch = client.beta.messages.batches.retrieve(BATCH_NAME)
print(f"Batch {message_batch.id} processing status is {message_batch.processing_status}")

Batch msgbatch_01PX3X1QRaJdP94MujZg2pVb processing status is ended


In [14]:
message_batch

BetaMessageBatch(id='msgbatch_01PX3X1QRaJdP94MujZg2pVb', cancel_initiated_at=None, created_at=datetime.datetime(2024, 11, 2, 5, 51, 0, 604839, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2024, 11, 2, 5, 52, 39, 664488, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2024, 11, 3, 5, 51, 0, 604839, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=BetaMessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=0, succeeded=4797), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01PX3X1QRaJdP94MujZg2pVb/results', type='message_batch', archived_at=None)

## Get and save results

In [15]:
for result in client.beta.messages.batches.results(BATCH_NAME):
    match result.result.type:
        case "succeeded":
            pass
            # print(f"Success! {result.custom_id}")
            # print(result)
        case "errored":
            if result.result.error.type == "invalid_request":
                # Request body must be fixed before re-sending request
                print(f"Validation error {result.custom_id}")
            else:
                # Request can be retried directly
                print(f"Server error {result.custom_id}")
        case "expired":
            print(f"Request expired {result.custom_id}")

In [17]:
results = []
for result in client.beta.messages.batches.results(BATCH_NAME):
    results.append(result.to_dict())
write_jsonl(results, BASEDIR / "outputs/short_answer/claude-3-5-sonnet-20241022/outputs.jsonl")