# Trying to unit test batch inference with bedrock

(following guide [here](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html))


Note, minimum input jsonl file size is 1MB for batch inference to work, even though [this quotas page](https://docs.aws.amazon.com/bedrock/latest/userguide/quotas.html#quotas-batch) says 20MB.

### Install python bedrock SDK from reinvent (downloaded as a zip file) [[link](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference-example.html)][[link](https://docs.aws.amazon.com/bedrock/latest/userguide/api-setup.html#api-sdk)]

In [1]:
!mkdir -p tmp-boto-install
!unzip /home/ubuntu/bedrock-python-sdk-reinvent.zip -d tmp-boto-install
!pip install tmp-boto-install/botocore-1.32.4-py3-none-any.whl
!pip install tmp-boto-install/boto3-1.29.4-py3-none-any.whl

In [2]:
import json

import boto3

BUCKET_NAME = "test-tmp-data-upload-bucket"
INPUT_FILE_NAME = "test-batch-inference-data.jsonl"

In [3]:
# Test single prompt inference, to make sure connections work
def test_single_prompt():
    brt = boto3.client(service_name="bedrock-runtime")

    body = json.dumps(
        {
            "prompt": "\n\nHuman: explain black holes to 8th graders\n\nAssistant:",
            "max_tokens_to_sample": 300,
            "temperature": 0.1,
            "top_p": 0.9,
        }
    )

    modelId = "anthropic.claude-v2"
    accept = "application/json"
    contentType = "application/json"

    response = brt.invoke_model(
        body=body, modelId=modelId, accept=accept, contentType=contentType
    )

    response_body = json.loads(response.get("body").read())

    # text
    print(response_body.get("completion"))

In [4]:
test_single_prompt()

### For batch inference, you need to provide your inputs in an s3 bucket as a jsonl file

In [5]:
def create_bucket_and_upload_file(bucket_name, file_name, object_name=None):
    # Create S3 client
    s3 = boto3.client("s3")

    # Create the bucket
    try:
        s3.create_bucket(Bucket=bucket_name)
    except s3.exceptions.BucketAlreadyOwnedByYou:
        print(f"Bucket {bucket_name} already exists.")

    # Upload the file (object_name is the name as appears in s3)
    if not object_name:
        object_name = file_name

    s3.upload_file(file_name, bucket_name, object_name)
    print(f"File uploaded to {bucket_name}/{object_name}")

### Minimum input file size for batch inference is 1MB, so let's make up 2k examples asking the model to translate english to spanish.

In [7]:
# !pip install datasets
from datasets import load_dataset

squad_dataset = load_dataset("squad")  # 87599 rows in train


def create_squad_prompts(dataset, nprompts=5) -> list[str]:
    prompts = []
    for i in range(nprompts):
        prompts.append(
            f"\n\nHuman: Convert the following text to spanish: <text> {dataset['train'][i]['context']} "
            "</text>\n\nAssistant: The text converted to spanish is: "
        )
    return prompts

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
prompts = create_squad_prompts(squad_dataset, nprompts=2000)

In [9]:
# Let's create a large jsonl file (min 1MB) for batch inference
def create_jsonl_file(prompts: list, out_file_name: str):
    # recordId is optional (will be assigned if empty)
    # modelInput is what was the "body" in single-inference

    with open(out_file_name, "w") as f:
        for i, prompt in enumerate(prompts):
            f.write(
                json.dumps(
                    {
                        "recordId": str(i),
                        "modelInput": {
                            "prompt": prompt,
                            "max_tokens_to_sample": 1000,
                            "temperature": 0.1,
                            "top_p": 0.9,
                        },
                    }
                )
            )
            f.write("\n")
    print(f"Created {out_file_name} with {i+1} lines.")

In [10]:
create_jsonl_file(prompts, INPUT_FILE_NAME)

Created test-batch-inference-data.jsonl with 1999 lines.


In [11]:
!du -hs $INPUT_FILE_NAME

2.1M	test-batch-inference-data.jsonl


### Upload the jsonl file to an s3 bucket where the bedrock batch job can grab it

In [12]:
create_bucket_and_upload_file(
    bucket_name=BUCKET_NAME,
    file_name=INPUT_FILE_NAME,
)

File uploaded to test-tmp-data-upload-bucket/test-batch-inference-data.jsonl


### Submit the batch job, grab the jobArn identifier (used to check status, find results, etc)

In [14]:
# Submit batch job

bedrock = boto3.client(service_name="bedrock")

inputDataConfig = {
    "s3InputDataConfig": {"s3Uri": f"s3://{BUCKET_NAME}/{INPUT_FILE_NAME}"}
}

outputDataConfig = {"s3OutputDataConfig": {"s3Uri": f"s3://{BUCKET_NAME}/"}}

response = bedrock.create_model_invocation_job(
    roleArn="arn:aws:iam::339712833620:role/kaleko-test-ec2-bedrock-role",
    modelId="anthropic.claude-v2",
    jobName="my-batch-job-try7",
    inputDataConfig=inputDataConfig,
    outputDataConfig=outputDataConfig,
)

jobArn = response.get("jobArn")

In [24]:
# Get status of job (keep polling this until it's done. 2k lines took 30 mins)
bedrock.get_model_invocation_job(jobIdentifier=jobArn)

{'ResponseMetadata': {'RequestId': 'eaeeba7d-bd2e-4098-b516-5ccb87c5e7af',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 06 Mar 2024 22:16:50 GMT',
   'content-type': 'application/json',
   'content-length': '597',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'eaeeba7d-bd2e-4098-b516-5ccb87c5e7af'},
  'RetryAttempts': 0},
 'jobArn': 'arn:aws:bedrock:us-east-1:339712833620:model-invocation-job/z8sje076r0pa',
 'jobName': 'my-batch-job-try7',
 'modelId': 'arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2:0',
 'status': 'Completed',
 'submitTime': datetime.datetime(2024, 3, 6, 20, 53, 50, 31000, tzinfo=tzlocal()),
 'lastModifiedTime': datetime.datetime(2024, 3, 6, 21, 23, 43, 933000, tzinfo=tzlocal()),
 'inputDataConfig': {'s3InputDataConfig': {'s3Uri': 's3://test-tmp-data-upload-bucket/test-batch-inference-data.jsonl'}},
 'outputDataConfig': {'s3OutputDataConfig': {'s3Uri': 's3://test-tmp-data-upload-bucket/'}}}

### Download the artifacts the job created from the output s3 bucket (path within bucket defaults to the jobArn)

In [45]:
# Download the created jsonl file to have a look, and the manifest that gets created
s3 = boto3.client("s3")

OUT_s3_DIR = jobArn.split("/")[-1]
!mkdir -p $OUT_s3_DIR

MANIFEST_FILE = f"{OUT_s3_DIR}/manifest.json.out"
OUTPUT_FILE = f"{OUT_s3_DIR}/{INPUT_FILE_NAME}.out"

s3.download_file(BUCKET_NAME, MANIFEST_FILE, MANIFEST_FILE)
s3.download_file(
    BUCKET_NAME,
    OUTPUT_FILE,
    OUTPUT_FILE,
)

In [46]:
!head $MANIFEST_FILE

{"processedRecordCount":2000,"successRecordCount":2000,"errorRecordCount":0,"inputTokenCount":436198,"outputTokenCount":574206}


In [48]:
!head $OUTPUT_FILE -n 1

{"modelInput":{"prompt":"\n\nHuman: Convert the following text to spanish: <text> Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. </text>\n\nAssistant: The text converted to spanish is: ","max_tokens_to_sample":1000,"temperature":0.1,"top_p":0.9},"modelOutput":{"completion":"\n\n<text> Arquitectónicamente, la escuela tiene un carácter católico. En 

### Compute total cost from the manifest file

In [51]:
# Compute total cost for curiosity
mani = json.load(open(MANIFEST_FILE))
IN_TOKEN_COST = 0.00800 / 1000.0
OUT_TOKEN_COST = 0.02400 / 1000.0

TOTAL_COST = (
    mani["inputTokenCount"] * IN_TOKEN_COST + mani["outputTokenCount"] * OUT_TOKEN_COST
)

print(f"Total cost: ${TOTAL_COST:.2f}")

Total cost: $17.27
