In [1]:
pip install -r requirements.txt





In [2]:
import boto3
import json


In [3]:
bedrock_client = boto3.client('bedrock', region_name='us-east-1')


In [4]:
import json
def create_application_inference_profile(profile_name: str, model_arn: str, description: str, tags: list):
    response =  bedrock_client.create_inference_profile(
        inferenceProfileName=profile_name,
        description=description,
        modelSource={'copyFrom': model_arn},
         tags=tags # create tags when creating a custom application inference profile
    )
    
    print("CreateInferenceProfile Response:", response['ResponseMetadata']['HTTPStatusCode']),
    print(json.dumps(response, indent=4, sort_keys=True, default=str))
    return response


In [5]:
tags = [{'key': 'dept', 'value': 'claims'}]

base_model_arn = "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-haiku-20240307-v1:0"
description = "custom inference profile 05 dec"
us_east_haiku3_custom_profile_v2 = create_application_inference_profile("us_east_haiku3_custom_profile_v2", base_model_arn, description, tags)


CreateInferenceProfile Response: 201
{
    "ResponseMetadata": {
        "HTTPHeaders": {
            "connection": "keep-alive",
            "content-length": "125",
            "content-type": "application/json",
            "date": "Thu, 05 Dec 2024 13:32:47 GMT",
            "x-amzn-requestid": "a4a8c917-6b29-4ec5-99c7-26b5a77358b5"
        },
        "HTTPStatusCode": 201,
        "RequestId": "a4a8c917-6b29-4ec5-99c7-26b5a77358b5",
        "RetryAttempts": 0
    },
    "inferenceProfileArn": "arn:aws:bedrock:us-east-1:248020845915:application-inference-profile/grzvgahtspkt",
    "status": "ACTIVE"
}


In [6]:
# Extracting the ARN and retrieving Application Inference Profile ID
us_east_haiku3_custom_profile_v2_arn = us_east_haiku3_custom_profile_v2['inferenceProfileArn']
us_east_haiku3_custom_profile_v2_arn

'arn:aws:bedrock:us-east-1:248020845915:application-inference-profile/grzvgahtspkt'

In [7]:
bedrock_client.list_tags_for_resource(resourceARN=us_east_haiku3_custom_profile_v2_arn)['tags']

[{'key': 'dept', 'value': 'claims'}]

In [5]:
import json

def list_inference_profiles():
    response = bedrock_client.list_inference_profiles(typeEquals='APPLICATION')
    
    print(f"Found {len(response['inferenceProfileSummaries'])} inference profiles:")
    for profile in response['inferenceProfileSummaries']:
        print(json.dumps(profile, indent=2, default=str))
           
    return response

# Test the function
print("Listing all inference profiles...")
profiles = list_inference_profiles()

Listing all inference profiles...
Found 5 inference profiles:
{
  "inferenceProfileName": "us-east-haiku3-custom-profile",
  "description": "inf profile 29nov",
  "createdAt": "2024-11-29 14:37:34.218038+00:00",
  "updatedAt": "2024-11-29 14:37:34.218038+00:00",
  "inferenceProfileArn": "arn:aws:bedrock:us-east-1:248020845915:application-inference-profile/d2xqascmonz7",
  "models": [
    {
      "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-haiku-20240307-v1:0"
    }
  ],
  "inferenceProfileId": "d2xqascmonz7",
  "status": "ACTIVE",
  "type": "APPLICATION"
}
{
  "inferenceProfileName": "us-east-haiku3-custom-profile",
  "description": "inf profile 29nov",
  "createdAt": "2024-11-29 14:38:21.480089+00:00",
  "updatedAt": "2024-11-29 14:38:21.480089+00:00",
  "inferenceProfileArn": "arn:aws:bedrock:us-east-1:248020845915:application-inference-profile/tmrws6cgoino",
  "models": [
    {
      "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.c

In [8]:
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-east-1')

In [None]:
def converse(model_id, messages):
    """Use the Converse API to engage in a conversation with the specified model"""
    response = bedrock_runtime.converse(
        modelId=model_id,
        messages=messages,
        inferenceConfig={
            'maxTokens': 300,  # Specify max tokens if needed
        }
    )
    
    status_code = response.get('ResponseMetadata', {}).get('HTTPStatusCode')
    print("Converse Response:", status_code)
    return response

# Example of Converse API with Application Inference Profile
print("\nTesting Converse...")
prompt = "\n\nHuman: Tell me about S3.\n\nAssistant:"
messages = [{"role": "user", "content": [{"text": prompt}]}]
for i in range(3):
    response = converse(us_east_haiku3_custom_profile_v2_arn, messages)
    response_json = json.dumps(response, indent=4)
    print(response_json)


Testing Converse...
Converse Response: 200
{
    "ResponseMetadata": {
        "RequestId": "a5caeb6c-6b4b-4367-84df-8d503cc8b42c",
        "HTTPStatusCode": 200,
        "HTTPHeaders": {
            "date": "Thu, 05 Dec 2024 13:35:23 GMT",
            "content-type": "application/json",
            "content-length": "1469",
            "connection": "keep-alive",
            "x-amzn-requestid": "a5caeb6c-6b4b-4367-84df-8d503cc8b42c"
        },
        "RetryAttempts": 0
    },
    "output": {
        "message": {
            "role": "assistant",
            "content": [
                {
                    "text": "Sure, I'd be happy to provide an overview of Amazon S3 (Simple Storage Service).\n\nS3 is a highly scalable and durable object storage service offered by Amazon Web Services (AWS). It allows you to store and retrieve any amount of data, at any time, from anywhere on the web. Some key features of S3 include:\n\n- Data Storage: S3 enables you to store and retrieve data in t