# Amazon Bedrock - Zero to Hero - Prepared for Twilio Hackathon (Tweek Week)

### *You have an Idea >> want generation/search workflow backed by an LLM >> Want minimal infra setup >> Use Bedrock*


# # Zero

In [None]:
# Common tasks
# Install required packages. Ensure, Python version > 3.10
!pip -q install -r requirements.txt

# Import python SDK
import boto3
import json


## Initialize Python client object for bedrock-runtime service
bedrock = boto3.client(service_name='bedrock-runtime', region_name='us-west-2')

model_id = "amazon.titan-text-lite-v1"  # Replace with your model ID
prompt = "What is the meaning of love?"  # Replace with your prompt


In [None]:
# My first Bedrock Invoke API Call
response = bedrock.invoke_model(
    body=f'{{"inputText": "{prompt}"}}',
    modelId=model_id)

print(response)  # Print the raw response for debugging

result= response['body'].read().decode('utf-8') # Decode the response body as it is serialized in bytes
print(json.dumps(result,indent=2))


### ## Bedrock Features and Capabilities

- **Fully Managed**: No need to manage infrastructure; AWS handles scaling, security, and maintenance.
- **Foundation Models**: Access to a variety of pre-trained models from AI startups and Amazon.
- **Knowledge Bases**: Create and manage knowledge bases to enhance the capabilities of your applications.
- **Guardrails**: Built-in safety features to ensure responsible AI usage, including content filtering and moderation.
- **Customization**: Fine-tune models with your own data to improve performance for specific tasks



In [None]:
# [[Bedrock Capability]] Hot-swapping model IDs for testing
# model_id1 = "amazon.titan-text-premier-v1:0" # for us-east-1
model_id1 = "amazon.titan-tg1-large"            # for us-west-2

# Make native request suitable for Titan Text family models.
# Format the request payload using the model's native structure.
native_request = {
    "inputText": prompt,
    "textGenerationConfig": {
        "maxTokenCount": 512,
        "temperature": 0.5,
    },
}

# Convert the native request to JSON.
request = json.dumps(native_request)

# Invoke the model using the Bedrock client.
response = bedrock.invoke_model(
    modelId=model_id1,
    contentType='application/json',
    accept='application/json',
    body=request
)

# Decode the response body as it is serialized in bytes
# result = response["body"].read().decode('utf-8')
# print(json.dumps(result, indent=2))

# Decode the response body.
model_response = json.loads(response["body"].read())
print (model_response)
# Extract and print the response text.
response_text = model_response["results"][0]["outputText"]
print(response_text)

## model_id2 = "anthropic.claude-3-5-sonnet-20240620-v1:0" 
## DISCUSS: The anthropic model won't work as is for below function, as it requires a different body structure.
## We solve for this later. 
## Typically, hot-swapping models happen for testing different versions of same model class. Bedrock also supports fine-tuning of models. 


In [None]:
# Use Bedrock Client for Admin 
bedrock_client = boto3.client(service_name='bedrock', region_name='us-west-2')

# List Foundation Models
# https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html#model-ids-arns
# https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html

response = bedrock_client.list_foundation_models()
print(json.dumps(response, indent=2, sort_keys=True))

# # Crawl

## ## Let's look at customizations and more client libraries for Bedrock (under python/boto3)

1. [Bedrock](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock.html)
2. [Bedrock-runtime](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
3. [bedrock-agent](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent.html)
4. [bedrock-agent-runtime](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime.html)

Other API calls:

```python
bedrock.list_foundation_models | create_guardrail | create_custom_model | create_custom_model_deployment | create_evaluation_job
bedrock-runtime.invoke_model | invoke_model_with_response_stream | converse | converse_with_response_stream
bedrock-agent.create_agent | create_knowledge_base | associate_agent_knowledge_base | associate_agent_collaborators
bedrock-agent-runtime.invoke_agent| invoke_flow | retreive | rerank | retreive_and_generate | retreive_and_generate_stream
```

In [None]:
# First, let's Stream responses

## Function to invoke_model_with_response_stream and print the output
def invoke_model_with_response_stream(prompt, model_id):
    response_stream = bedrock.invoke_model_with_response_stream(
        modelId=model_id,
        contentType='application/json',
        accept='application/json',
        body=f'{{"inputText": "{prompt}"}}'
    )
    
    for event in response_stream['body']:
        event = event['chunk']['bytes'].decode('utf-8') # Decode each event from bytes to string
        event = json.loads(event)['outputText'] # load the JSON string into dictionary and get outputText
        print(event, end='', flush=True)  # Print streaming output. Set end='' to avoid newlines and flush=True to ensure immediate output
    print()  # Print a newline at the end  
invoke_model_with_response_stream(prompt, model_id1)  # Call the function with the model ID and prompt

## QUIZ: Why "stream"? if the response content is the same? 
## DISCUSS: DEEPSEEK'S   "WAIT"

In [None]:
# Use converse API to have a conversation with the model via Messages
def converse_with_model(prompt, model_id):
    """
    Provides a consistent interface that works with all models that support messages
    """
    messages = [{
        'role': 'user', 
        'content': [{'text': prompt}]
        }]  # Create a message structure with role and content

    response = bedrock.converse(
        modelId=model_id,
        messages=messages
        )
    return response['output']

output = converse_with_model(prompt, model_id1)
print(output)  # Print the output of the conversation



{'message': {'role': 'assistant', 'content': [{'text': '\n\nThe model is unable to give an opinion on the meaning of love, but can provide general information on the topic. Love is a complex emotion that can be expressed in various ways, such as through kindness, compassion, and physical intimacy. It is a deeply personal and subjective experience that can vary greatly from person to person. \r\n\r\nLove can be found in many different forms, including romantic relationships, friendships, and familial bonds. At its core, love is about connecting with another person on a deep level and wanting the best for them. While the meaning of love can vary, it is a fundamental aspect of the human experience and is essential for human connection and happiness.'}]}}


In [None]:
# Next, lets customize the model with inferenceConfig

"""When you make inference calls to models with the model invocation 
(InvokeModel, InvokeModelWithResponseStream, Converse, and ConverseStream) API operations, 
you include request parameters depending on the model that you're using.
Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html

Below is an example of how to use the inferenceConfig parameter in the InvokeModel API operation.
"""
titan_inference_config = {        # This works for Titan Text models.
    "maxTokenCount": 10,    # Output tokens do not have a budget. Different keys, for different models. If too low, response can be unfinished with exception(e) thrown
    "temperature": 0.7,     # Controls the randomness. Lower values make the output more deterministic. Rebalance probabilities to 1. Some models take >1. Change by large numbers. 
    "topP": 0.9             # Controls the diversity. Lower values make the output more focused. Of top-k (which is not exposed by all models), this reduces the top selection to given probability (p) threshold. Rebalance probabilities to 1. 
}                           # DISCUSS: How to use these parameters effectively? LONG YELLOW FRUIT example?


custom_request = {
    "inputText": prompt,
    "textGenerationConfig": titan_inference_config  # Include the inference configuration in the request
}
custom_request = json.dumps(custom_request)

# Invoke the model with the inference configuration
response = bedrock.invoke_model(
    modelId=model_id1,
    contentType='application/json',
    accept='application/json',
    body=custom_request
)
result = response['body'].read().decode('utf-8')
print(result)

# # Walk


## ## What else Amazon Bedrock can do besides LLM backed APIs (Aug 2025)?

Now, that you can crawl with Bedrock, lets learn additional Bedrock Capabilities for Agentic AI

1. Prompt Engineering
2. Agents
3. Knowledge Bases
4. Guardrails 
5. Evaluations
6. ...and more

Next, we will look at Bedrock Console

Below, I have already created (via AWS console) a guardrail. I am using the guardrail identifier below.

Guardrails can be implemented over any invoke or converse API calls.

In [None]:
# Lets learn how to use guardrails for model invocation

response = bedrock.invoke_model(
    body = request,
    modelId = model_id1,
    guardrailIdentifier = "3r1nkkv80wth",
    guardrailVersion ="DRAFT", 
    trace = "ENABLED"
)

# Decode the response body as it is serialized in bytes
result = response["body"].read().decode('utf-8')
print(result)

In [None]:
# Now, lets use OpenAI models via Bedrock

# Initialize the Bedrock Runtime client for 'us-west-2' region
client = boto3.client(service_name='bedrock-runtime', region_name='us-west-2')

# Model ID
model_id = 'openai.gpt-oss-20b-1:0'

# Inference configuration
inference_config = {
    "maxTokens": 512,        
    "temperature": 0.7,      
    "topP": 0.9              
}

# Create the request body
native_request = {
  "model": model_id, # You can omit this field
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "assistant", 
      "content": "Hello! How can I help you today?"
    },
    {
      "role": "user",
      "content": prompt
    }
  ],
  "max_completion_tokens": inference_config["maxTokens"],   # OpenAI expects this field
  "temperature": inference_config["temperature"],           # OpenAI expects this field
  "top_p": inference_config["topP"],                        # OpenAI expects this field
  "stream": False                                           # You can omit this field
}



# Make the InvokeModel request
response = client.invoke_model(
    modelId=model_id,
    body=json.dumps(native_request)
    )

# Parse and print the message for each choice in the chat completion
response_body = json.loads(response['body'].read().decode('utf-8'))

print(response_body)  # Print the entire response body for debugging

for choice in response_body['choices']:
    print(choice['message']['content'])

In [None]:
# Example usage of Bedrock-Agents boto3 client
## Boto3 client docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock.html

## This is almost an example of how "not" to use Bedrock Agents today, as it is a low-level API. 
## We will use Strands later to create agents with more advanced capabilities.
## For now, we will use this to create and prepare an agent for use. 

import boto3
agents_client = boto3.client('bedrock-agent', region_name='us-east-1')
# Create an agent
response = agents_client.create_agent(
    agentName='MyAgent',
    foundationModel='amazon.titan-text-premier-v1:0'
)
print(response)  # Print the response from creating the agent


# # Run

## ## Strands

### Strands Python SDK features Overview
1. **Lightweight & Flexible**: Simple agent loop that just works and is fully customizable
2. **Model Agnostic**: Support for Amazon Bedrock, Anthropic, LiteLLM, Llama, Ollama, OpenAI, Writer, and custom providers
3. **Advanced Capabilities**: Multi-agent systems, autonomous agents, and streaming support
4. **Built-in MCP**: Native support for Model Context Protocol (MCP) servers, enabling access to thousands of pre-built tools

### Basic Concepts

Strands Agents is a framework for building AI agents that can interact with AWS services and perform complex tasks. The key components are:

1. **Agent**: The core component that manages the conversation and orchestrates tools
2. **Model**: The underlying LLM (Large Language Model) that powers the agent
3. **Tools**: Functions that the agent can use to perform specific tasks
4. **Sessions** and State: Mechanisms for maintaining conversation history and agent state across interactions
5. **Agent Loop**: The process flow of how agents receive input, process it, and generate responses
6. **Context Management**: How agents maintain and manage conversation context, including memory and retrieval


In [None]:
# Install Strands Python SDK (requires Python 3.10+)
!pip install strands-agents strands-agents-tools

In [None]:
# How to build an agent using Strands
from strands import Agent

agent = Agent()
response = agent(prompt)

print(response)

In [None]:
# Customize Strands' Defaults

# The default model provider is Amazon Bedrock and the default model is Claude 4 Sonnet in the US Oregon (us-west-2) region.

agent = Agent(model="amazon.nova-pro-v1:0")
response = agent(prompt)
print(response)


In [None]:
# Understand Model Providers

# Strands is model agnostic and provides a unified interface to work with different model providers 
# using a `Model` class available via the `strands.models` module.
# Here, I am using BedrockModel to configu
from strands.models import BedrockModel

bedrock_model = BedrockModel(
    model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
    max_tokens=1000,
    temperature=0.7,  # Adjust temperature for response randomness
    top_p=0.9,  # Adjust top_p for response diversity
)

agent = Agent(model=bedrock_model)
response = agent(prompt)
# print(response)
# Always wrap your calls in Try/Except clauses. 
# DISCUSS: WHY? Tokens are not free (atleast today), and reasons to control verbosity. Try changing max_tokens to 10, and see the difference.
# DISCUSS: Token Budgets. And how model implementations differ in handling them.


Lets understand tools.

Tools can be any of three types:
1. Python function call, decorated as a @tool 
2. Built-in tools, imported using strands_tools
3. Path to a python file

In [None]:
# Add TOOLS to the Agent
from strands import Agent, tool

# Import the calculator tool.
from strands_tools import calculator # Import the calculator tool
# https://github.com/strands-agents/tools --> List of built-in tools. 

# Create a custom tool 
@tool
def weather():
    """ Get weather """ # Dummy implementation
    return "Always sunny in Philly"

agent = Agent(
    model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",  # Optional: Specify the model ID
    tools=[calculator, weather],
    system_prompt="You are a helpful assistant who can only talk in rhymes. You can do simple math calculation and tell the weather.")

print (agent("What is 2+2?"))  # Use the calculator tool

response = agent("What is the weather today?")
# print(response)
# Sharp eyes can tell, you do NOT need to print. 

Strands is model-agnostic. Lets use OpenAI direct API calls using Strands

In [None]:
# Run an Agent with OpenAI directly - using Strands
%pip install 'strands-agents[openai]'

In [None]:
# Run OpenAI model using Strands (I do not have an API Key)
from strands import Agent
from strands.models.openai import OpenAIModel
import os
api = os.environ.get("OpenAI_API_KEY", "Really??")
print (api)
# Initialize the OpenAI model with custom parameters
openai_model = OpenAIModel(
    model_id="gpt-3.5-turbo",  # Replace with your OpenAI model ID
    client_args={
        "api_key": api  # Replace with your OpenAI API key
    },
    params={
        "temperature": 0.7,  # Adjust temperature for response randomness
        "max_tokens": 100,  # Adjust max tokens for response length
    }
)
# Create an agent with the OpenAI model
agent = Agent(model=openai_model)
# response = agent(prompt) # Output will fail without a valid OpenAI API key
# print(response)

# # Hero

MCP is an open protocol that standardizes how applications provide context to large language models (LLMs).
MCP has 3 components, 1/ a MCP server (can be local or remote), 2/ MCP client, and 3/ Transport layer protocol, which enable 4/ Data layer

 
The protocol currently defines two standard transport mechanisms for client-server communication:
1. stdio, communication over standard in and standard out
2. Streamable HTTP
3. custom transports

In [None]:
# MCP tool - my first MCP 

from mcp import StdioServerParameters, stdio_client
from strands import Agent
from strands.tools.mcp import MCPClient
# from mcp.client.streamable_http import streamablehttp_client
# from mcp.server import FastMCP # FastMCP is a server implementation of MCP protocol

# Connect to an MCP server using stdio transport
stdio_mcp_client = MCPClient(
    lambda: stdio_client(
        StdioServerParameters(
            command="uvx", args=["awslabs.aws-documentation-mcp-server@latest"]
        )
    )
)

# Create an agent with MCP tools
with stdio_mcp_client:
    # Get the tools from the MCP server
    tools = stdio_mcp_client.list_tools_sync()
    # print(tools)
    
    # Create an agent with these tools
    agent = Agent(
        model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        tools=tools)

    response = agent("What is Amazon Bedrock pricing model. Be concise.")

In [None]:
!pip -q install "mcp[cli]" nest_asyncio
import nest_asyncio, sys
nest_asyncio.apply()  # Apply the patch to allow nested event loops
print("Python ready. Make sure Node.js/npm are installed on THIS machine to run 'npx'")

In [None]:
import os, asyncio, json
from mcp import ClientSession
from mcp.client.stdio import stdio_client
# from mcp.client.session import get_initialized_session
from mcp.types import ListToolsRequest
from mcp.client.stdio import StdioServerParameters

# --- REQUIRED: Twilio credentials ---
# Recommended: create a Twilio API Key/Secret under your Account SID
TWILIO_ACCOUNT_SID = "AC12877baa7a81f8...c7"   # e.g., AC... from Twilio Console
TWILIO_API_KEY     = "insert"   # e.g., SK... from Twilio Console
TWILIO_API_SECRET  = "insert"

# Twilio MCP expects "ACCOUNT_SID/API_KEY:API_SECRET"
CRED_ARG = f"{TWILIO_ACCOUNT_SID}/{TWILIO_API_KEY}:{TWILIO_API_SECRET}"

# --- Optional: set a default region/data center via env if your account needs it
env = os.environ.copy()
# env["TWILIO_REGION"] = "us1"  # example; only if you use regional routing

# --- Configure the stdio server: this spawns the Twilio MCP server via npx
server_params = StdioServerParameters(
    command="npx",
    args=["-y", "@twilio-alpha/mcp", CRED_ARG],
    env=env,
)

async def start_and_list_tools():
    # stdio_client starts the child process and wires up stdin/stdout
    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            # Initialize per MCP spec (negotiates capabilities)
            await session.initialize()

            # List tools exposed by the Twilio MCP server
            tools_resp = await session.list_tools()
            tool_names = [t.name for t in tools_resp.tools]
            print(f"Discovered {len(tool_names)} tools:")
            for i, name in enumerate(tool_names, 1):
                print(f"{i:2d}. {name}")
            return tool_names, session

# Run the coroutine and keep the session live by stashing it in globals
tools, _ = asyncio.get_event_loop().run_until_complete(start_and_list_tools())

In [None]:
# We'll re-open a session and keep it as a global for convenience.
global _MCP_SESSION_CTX
async def _open_session():
    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            await session.initialize()
            yield session
class SessionManager:
    def __init__(self):
        self._ctx = None
        self.session = None
    async def __aenter__(self):
        self._ctx = _open_session()
        self.session = await self._ctx.__anext__()  # enter the async generator
        return self.session
    async def __aexit__(self, exc_type, exc, tb):
        try:
            await self._ctx.__anext__()  # will raise StopAsyncIteration to close
        except StopAsyncIteration:
            pass
MCP = SessionManager()
print("SessionManager ready. Use `async with MCP as session:` to interact.")

import pprint, asyncio
async def describe_tools():
    async with MCP as session:
        tools_resp = await session.list_tools()
        for t in tools_resp.tools:
            print("="*80)
            print("Tool:", t.name)
            if t.inputSchema:
                print("Input schema (JSON Schema):")
                pprint.pprint(t.inputSchema)
            else:
                print("No input schema advertised.")
asyncio.get_event_loop().run_until_complete(describe_tools())
