In [None]:
!python3 -m pip install botocore-1.32.4-py3-none-any.whl
!python3 -m pip install boto3-1.29.4-py3-none-any.whl

In [None]:
import os
import sys
import json
import boto3

module_path = "."
sys.path.append(os.path.abspath(module_path))
from utils import bedrock, print_ww

boto3_bedrock = bedrock.get_bedrock_client(
    region='us-east-1',
    runtime=False
)

boto3_bedrock_runtime = bedrock.get_bedrock_client(
    region='us-east-1',
    runtime=True
)

In [None]:
with open("prompt_template.txt", "r") as f:
    prompt_string = f.read()

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    template=prompt_string, input_variables=["english_string"]
)

prompt = prompt_template.format(english_string="A friend in need is a friend indeed.")

In [None]:
body =  json.dumps({
    "prompt": prompt,
    "max_tokens_to_sample": 100,
    "temperature": 0.1,
    "top_k": 3,
    "top_p": 1.,
    "stop_sequences":["\n\nHuman:"]
    })

In [None]:
modelId = 'anthropic.claude-v2:1' # change this to use a different version from the model provider
accept = 'application/json'
contentType = 'application/json'

response = boto3_bedrock_runtime.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
response_body = json.loads(response.get('body').read())

print_ww(response_body.get('completion'))

In [None]:
# generate fake data

In [None]:
body =  json.dumps({
    "prompt": prompt,
    "max_tokens_to_sample": 100,
    "temperature": 0.1,
    "top_k": 3,
    "top_p": 1.,
    "stop_sequences":["\n\nHuman:"]
    })

In [None]:
import random

gen_inputs = []
for j in range(10):
    body_obj = {
        "recordId" : ''.join(random.choice('0123456789ABCDEF') for i in range(12)), "modelInput": {
        "prompt" : prompt,
        "max_tokens_to_sample" : 100,
        "temperature" : 0.1,
        "top_k" : 3,
        "top_p" : 1.,
        "stop_sequences" :["\n\nHuman:"]
    }}
    gen_inputs.append(body_obj)

with open('fake_data.jsonl', 'a') as outfile:
    for response in gen_inputs:
        json.dump(response, outfile)
        outfile.write('\n')

In [None]:
# Test inference performance with 3000 samples with invoke_model api

In [None]:
import json
import time
import asyncio
import requests as req
import botocore.session
from itertools import groupby
from operator import itemgetter
from botocore.auth import SigV4Auth
from typing import Dict, List, Tuple
from botocore.awsrequest import AWSRequest

def get_inference(prompt: List) -> Tuple:
    try:
        modelId = 'anthropic.claude-v2:1'
        accept = 'application/json'
        contentType = 'application/json'
        payload = json.dumps(prompt)
        response = boto3_bedrock_runtime.invoke_model(body=payload, modelId=modelId, accept=accept, contentType=contentType)
        response_body = json.loads(response.get('body').read()) 
        
        status_code = response['ResponseMetadata']['HTTPStatusCode']
        if status_code == 200:
            print(response_body.get('completion'))
            return (response_body.get('completion'))
        else:
            print(f"Error: Received status code {status_code}, Response: {response.text}")
            return None
    except Exception as e:
        print(f"Exception occurred: {e}")
        return None

async def async_calls_on_model(prompt):
    return await asyncio.to_thread(get_inference, prompt)

async def parallel_calls(prompts):
    start_time = time.time()    
    responses = await asyncio.gather(*[async_calls_on_model(prompt) for prompt in prompts])
    end_time = time.time()
    elapsed_time = end_time - start_time
    print("\nAll tasks completed in {:.2f} seconds".format(elapsed_time))
    return None

In [None]:
prompts = []
with open("fake_data.jsonl", "r") as f:
    for line in f:
       prompts.append(json.loads(line)['modelInput'])

In [None]:
loop = asyncio.get_event_loop()
loop.create_task(parallel_calls(prompts))

In [None]:
# Test batch performance with 3000 samples with batch api

In [None]:
inputDataConfig=({
    "s3InputDataConfig": {
        "s3Uri": "<s3 uri of input data>"
    }
})

outputDataConfig=({
    "s3OutputDataConfig": {
        "s3Uri": "<s3 uri of output data>"
    }
})

In [None]:
# Please visit the below webpage and follow the guidance to create a new role: BedrockBatchInferenceRole
# https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference-permissions.html

roleArn = "arn:aws:iam::<account-id>:role/BedrockBatchInferenceRole"

In [None]:
response=boto3_bedrock.create_model_invocation_job(
    roleArn=roleArn,
    modelId=modelId,
    jobName="<jobname>",
    inputDataConfig=inputDataConfig,
    outputDataConfig=outputDataConfig
)


In [None]:
jobArn = response['jobArn']

In [None]:
boto3_bedrock.get_model_invocation_job(jobIdentifier=jobArn)['status']

In [None]:
boto3_bedrock.get_model_invocation_job(jobIdentifier=jobArn)