In [None]:
# Make sure you ran `download-dependencies.sh` from the root of the repository first!
# Restart kenerl after install
%pip install --no-build-isolation --force-reinstall \
    ../dependencies/awscli-*-py3-none-any.whl \
    ../dependencies/boto3-*-py3-none-any.whl \
    ../dependencies/botocore-*-py3-none-any.whl

In [None]:
import json
import boto3
from IPython.display import clear_output, display, display_markdown, Markdown

## Create the boto3 client

Interaction with the Bedrock API is done via the AWS SDK for Python: [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html).

Depending on your environment, you might need to customize the setup when creating your Bedrock service client.

In [None]:
bedrock = boto3.client(
 service_name='bedrock',
 region_name='us-west-2',
 endpoint_url='https://prod.us-west-2.frontend.bedrock.aws.dev' # for preview
)

In [None]:
# list available models
bedrock.list_foundation_models()

## `InvokeModel` body and output

The `invoke_model()` method of the Amazon Bedrock client (`InvokeModel` API) will be the primary method we use for most of our Text Generation and Processing tasks - whichever model we're using.

Although the method is shared, the format of input and output varies depending on the foundation model used - as described below:

### Amazon Titan Large

#### Input
```json
{   
    "inputText": "<prompt>",
    "textGenerationConfig" : { 
        "maxTokenCount": 512,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
    }
}
```

#### Output

```json
{
    "inputTextTokenCount": 613,
    "results": [{
        "tokenCount": 219,
        "outputText": "<output>"
    }]
}
```

### Anthropic Claude

#### Input

```json
{
    "prompt": "\n\nHuman:<prompt>\n\nAnswer:",
    "max_tokens_to_sample": 300,
    "temperature": 0.5,
    "top_k": 250,
    "top_p": 1,
    "stop_sequences": ["\n\nHuman:"]
}
```

#### Output

```json
{
    "completion": "<output>",
    "stop_reason": "stop_sequence"
}
```

### Stability AI Stable Diffusion XL

#### Input

```json
{
    "text_prompts": [
        {"text": "this is where you place your input text"}
    ],
    "cfg_scale": 10,
    "seed": 0,
    "steps": 50
}
```

#### Output

```json
{ 
    "result": "success", 
    "artifacts": [
        {
            "seed": 123, 
            "base64": "<image in base64>",
            "finishReason": "SUCCESS"
        },
        //...
    ]
}
```

## Amazon Titan

### Simple prompt

In [None]:
# If you'd like to try your own prompt, edit this parameter!
prompt_data = "Write a blog about Brazil history."

In [None]:
body = json.dumps({
    "inputText": prompt_data,
    "textGenerationConfig" : { 
        "maxTokenCount": 3072,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
    }
})
modelId = "amazon.titan-tg1-xlarge"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

print(response_body.get("results")[0].get("outputText"))

### Zero-shot

Zero Shot prompting describes the technique where we present a task to an LLM without giving it further examples. We therefore, expect it to perform the task without getting a prior look at a “shot” at the task. Hence, “zero-shot” prompting. Modern LLMs demonstrate remarkable zero-shot performance and a positive correlation can be drawn between model size and zero-shot performance.

In [None]:
# If you'd like to try your own prompt, edit this parameter!
prompt_data = """Command: Sulfuric acid reacts with sodium chloride, and gives <chemical1>_____</chemical1> and <chemical2>_____</chemical2>:
Assistant: the chemical1 and chemical 2 are:"""

In [None]:
body = json.dumps({
    "inputText": prompt_data,
    "textGenerationConfig" : { 
        "maxTokenCount": 3072,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
    }
})
modelId = "amazon.titan-tg1-xlarge"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

print(response_body.get("results")[0].get("outputText"))

### Few-shot

Giving the model more information about the tasks at hand via examples is called Few-Shot Prompting. It can be used for in-context learning by providing examples of the task and the desired output. We therefore condition the model on with the examples to follow the task guidance more closely.

In [None]:
# If you'd like to try your own prompt, edit this parameter!
prompt_data = """
Command:
GME to the moon // Neutral
APPL is going down // Negative
BA is moving sideways since forever // Negative
Bearish on SPY. The economy is going down // Positive
AMZN is going through the roof! //
"""

In [None]:
body = json.dumps({
    "inputText": prompt_data,
    "textGenerationConfig" : { 
        "maxTokenCount": 3072,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
    }
})
modelId = "amazon.titan-tg1-xlarge"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

print(response_body.get("results")[0].get("outputText"))

### Chain-of-Thought (with Few-Shot)
Chain-of-Thoughts (CoT) prompting breaks down complex reasoning tasks through intermediary reasoning steps. Chain-of-Thought prompts usually are very specific to a problem type. One can try to invoke CoT reasoning by using the trigger phrase “(Think Step-by-Step)”. Lets examine the below example of such a few-shot CoT prompt.

In [None]:
# If you'd like to try your own prompt, edit this parameter!
prompt_data = """
Command:

On a given week, the viewers for a TV channel were
Monday: 6500 viewers
Tuesday: 6400 viewers
Wednesday: 6300 viewers


Question: How many viewers can we expect on Friday?
Answer: Based on the numbers given and without any more information, there is a daily decrease of 100 viewers. If we assume this trend will continue during the following days, we can expect 6200 viewers on the next day that would be Thursday, and therefore 6100 viewers on the next day that would be Friday.


Question: How many viewers can we expect on Saturday? (Think Step-by-Step)
Answer:

"""

In [None]:
body = json.dumps({
    "inputText": prompt_data,
    "textGenerationConfig" : { 
        "maxTokenCount": 3072,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
    }
})
modelId = "amazon.titan-tg1-xlarge"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

print(response_body.get("results")[0].get("outputText"))

### Embedding

Use text embeddings to convert text into meaningful vector representations. You input a body of text and the output is a (1 x n) vector. You can use embedding vectors for a wide variety of applications. Bedrock currently offers Titan Embeddings for text embedding that supports text similarity (finding the semantic similarity between bodies of text) and text retrieval (such as search).

At the time of writing you can use amazon.titan-embed-g1-text-02 as embedding model via the API. The input text size is 8192 tokens and the output vector length is 1536.

To use a text embeddings model, use the InvokeModel API operation or the Python SDK. Use InvokeModel to retrieve the vector representation of the input text from the specified model.

In [None]:
prompt_data = """Amazon Bedrock supports foundation models from industry-leading providers such as
AI21 Labs, Anthropic, Stability AI, and Amazon. Choose the model that is best suited to achieving
your unique goals."""

In [None]:
body = json.dumps({
    "inputText": prompt_data,
})
modelId = "amazon.titan-e1t-medium"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

embedding = response_body.get("embedding")
print(f"The embedding vector has {len(embedding)} values\n{embedding[0:3]+['...']+embedding[-3:]}")

### Streaming

For large language models, it can take noticeable time to generate long output sequences. Rather than waiting for the entire response to be available, latency-sensitive applications may like to **stream** the response to users.

Run the code below to see how you can achieve this with Bedrock's `invoke_model_with_response_stream()` method - returning the response body in separate chunks.

In [None]:
# If you'd like to try your own prompt, edit this parameter!
prompt_data = "Write a blog about Brazil history."

In [None]:
body = json.dumps(
    {
        "inputText": prompt_data,
        "textGenerationConfig" : { 
        "maxTokenCount": 3072,
        "stopSequences": [],
        "temperature": 0.1,  
        "topP": 0.9
        }
    },
)
modelId = "amazon.titan-tg1-large"
accept = "application/json"
contentType = "application/json"

response = bedrock.invoke_model_with_response_stream(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
stream = response.get('body')
output = []

if stream:
    for event in stream:
        chunk = event.get('chunk')
        if chunk:
            chunk_obj = json.loads(chunk.get('bytes').decode())
            text = chunk_obj['outputText']
            clear_output(wait=True)
            output.append(text)
            display_markdown(Markdown(''.join(output)))

## Stable Diffusion XL

In [None]:
# Image example
'''
prompt_data = "a fine image of an astronaut riding a horse on Mars"
body = json.dumps({
    "text_prompts": [{"text": prompt_data}],
    "cfg_scale": 10,
    "seed": 20,
    "steps": 50
})
modelId = "stability.stable-diffusion-xl"
accept = "application/json"
contentType = "application/json"

response = boto3_bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept, contentType=contentType
)
response_body = json.loads(response.get("body").read())

print(response_body["result"])
print(f'{response_body.get("artifacts")[0].get("base64")[0:80]}...')
'''

**Note:** The output is a [base64 encoded](https://docs.python.org/3/library/base64.html) string of the image data. You can use any image processing library (such as [Pillow](https://pillow.readthedocs.io/en/stable/)) to decode the image as in the example below:

```python
import base64
import io
from PIL import Image

base_64_img_str = response_body.get("artifacts")[0].get("base64")
image = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, "utf-8"))))
```