# Demo 1: Basic RAG with Bedrock Knowledge Bases
Pattern: S3 documents → Bedrock KB → S3 Vectors → Amazon Nova Pro

In [1]:
import boto3
import json
import time

In [None]:
# Initialize clients
bedrock_agent = boto3.client('bedrock-agent')
s3 = boto3.client('s3')
iam = boto3.client('iam')
sts = boto3.client('sts')
s3vectors = boto3.client('s3vectors')

In [21]:
# Configuration
BUCKET_NAME = "bedrock-kb-demo1"
VECTOR_BUCKET_NAME = "bedrock-vectors-demo1"
KB_NAME = "travel-expense-policy-kb"
ROLE_NAME = "BedrockKnowledgeBaseRoleDemo1"
EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"
GENERATION_MODEL = "amazon.nova-pro-v1:0"

In [8]:
# Create S3 buckets
s3.create_bucket(Bucket=BUCKET_NAME)
s3.create_bucket(Bucket=VECTOR_BUCKET_NAME)
print(f"Created S3 bucket: {BUCKET_NAME}")
print(f"Created S3 vector bucket: {VECTOR_BUCKET_NAME}")

Created S3 bucket: bedrock-kb-demo1
Created S3 vector bucket: bedrock-vectors-demo1


In [9]:
# Upload sample travel policy document
travel_policy = """
TRAVEL EXPENSE POLICY

1. MEAL ALLOWANCES
- Breakfast: $25 maximum
- Lunch: $35 maximum  
- Dinner: $50 maximum
- Total daily meal allowance: $110

2. ACCOMMODATION
- Standard hotel rate: $200 per night maximum
- Extended stay (7+ nights): $150 per night maximum
- Receipts required for all accommodation expenses

3. TRANSPORTATION
- Airfare: Economy class only, book 14 days in advance
- Ground transportation: Taxi, rideshare, or rental car
- Mileage reimbursement: $0.65 per mile for personal vehicle

4. APPROVAL REQUIREMENTS
- Trips under $1,000: Manager approval required
- Trips over $1,000: Director approval required
- International travel: VP approval required
"""

s3.put_object(
    Bucket=BUCKET_NAME,
    Key="travel-policy.txt",
    Body=travel_policy.encode('utf-8')
)
print("Uploaded travel policy document")

Uploaded travel policy document


In [39]:
# Create IAM role for Bedrock Knowledge Base
account_id = sts.get_caller_identity()['Account']

trust_policy = {
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Principal": {"Service": "bedrock.amazonaws.com"},
        "Action": "sts:AssumeRole"
    }]
}

role_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": ["s3:GetObject", "s3:ListBucket"],
            "Resource": [f"arn:aws:s3:::{BUCKET_NAME}/*", f"arn:aws:s3:::{BUCKET_NAME}"]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3vectors:CreateVectorBucket",
                "s3vectors:GetVectorBucket",
                "s3vectors:ListVectorBuckets",
                "s3vectors:QueryVectors",
                "s3vectors:GetVectors"
            ],
            "Resource": "*"
        },
        {
            "Effect": "Allow",
            "Action": ["bedrock:InvokeModel"],
            "Resource": "*"
        }
    ]
}

try:
    role_response = iam.create_role(
        RoleName=ROLE_NAME,
        AssumeRolePolicyDocument=json.dumps(trust_policy)
    )
    
    iam.put_role_policy(
        RoleName=ROLE_NAME,
        PolicyName="BedrockKBPolicy",
        PolicyDocument=json.dumps(role_policy)
    )
    
    role_arn = role_response['Role']['Arn']
    print(f"Created IAM role: {role_arn}")
    time.sleep(10)
except Exception as e:
    print(f"Role creation error (may already exist): {e}")
    role_arn = f"arn:aws:iam::{account_id}:role/{ROLE_NAME}"

Created IAM role: arn:aws:iam::058264544288:role/BedrockKnowledgeBaseRoleDemo1


In [None]:
vector_bucket_response = s3vectors.create_vector_bucket(
    vectorBucketName=VECTOR_BUCKET_NAME
)
print(f"Created S3 Vector Bucket: {VECTOR_BUCKET_NAME}")

Created S3 Vector Bucket: bedrock-vectors-demo1


In [45]:
index_response = s3vectors.create_index(
    vectorBucketName=VECTOR_BUCKET_NAME,
    indexName="bedrock-kb-index",
    dimension=1024,  # For Titan embedding model
    dataType="float32",
    distanceMetric="cosine"
)
print(f"Created S3 Vector Index: bedrock-kb-index")

Created S3 Vector Index: bedrock-kb-index


In [46]:
# Create Knowledge Base
kb_config = {
    "name": KB_NAME,
    "description": "Travel expense policy knowledge base",
    "roleArn": role_arn,
    "knowledgeBaseConfiguration": {
        "type": "VECTOR",
        "vectorKnowledgeBaseConfiguration": {
            "embeddingModelArn": f"arn:aws:bedrock:us-east-1::foundation-model/{EMBEDDING_MODEL}"
        }
    },
    "storageConfiguration": {
        "type": "S3_VECTORS",
        "s3VectorsConfiguration": {
            "vectorBucketArn": vector_bucket_response['vectorBucketArn'],
            "indexName": "bedrock-kb-index"
        }
    }
}

kb_response = bedrock_agent.create_knowledge_base(**kb_config)
kb_id = kb_response['knowledgeBase']['knowledgeBaseId']
print(f"Created Knowledge Base: {kb_id}")

Created Knowledge Base: WCCBAKICPD


In [47]:
# Create Data Source
ds_config = {
    "knowledgeBaseId": kb_id,
    "name": "travel-policy-datasource",
    "dataSourceConfiguration": {
        "type": "S3",
        "s3Configuration": {
            "bucketArn": f"arn:aws:s3:::{BUCKET_NAME}"
        }
    }
}

ds_response = bedrock_agent.create_data_source(**ds_config)
ds_id = ds_response['dataSource']['dataSourceId']
print(f"Created Data Source: {ds_id}")

Created Data Source: TQUE8501XT


In [None]:
# Start ingestion job
ingestion_response = bedrock_agent.start_ingestion_job(
    knowledgeBaseId=kb_id,
    dataSourceId=ds_id
)
job_id = ingestion_response['ingestionJob']['ingestionJobId']
print(f"Started ingestion job: {job_id}")

# Wait for ingestion to complete
while True:
    job_status = bedrock_agent.get_ingestion_job(
        knowledgeBaseId=kb_id,
        dataSourceId=ds_id,
        ingestionJobId=job_id
    )
    status = job_status['ingestionJob']['status']
    print(f"Ingestion status: {status}")
    
    if status in ['COMPLETE', 'FAILED']:
        break
    time.sleep(30)

Started ingestion job: Q3XNBB4GRP
Ingestion status: STARTING


In [None]:
# Test RAG query using retrieve_and_generate with Nova Pro
def query_knowledge_base(question):
    response = bedrock_agent.retrieve_and_generate(
        input={'text': question},
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': kb_id,
                'modelArn': f'arn:aws:bedrock:us-east-1::foundation-model/{GENERATION_MODEL}'
            }
        }
    )
    return response['output']['text']

# Test queries
questions = [
    "What is the maximum dinner allowance?",
    "Who needs to approve international travel?",
    "What is the mileage reimbursement rate?"
]

for question in questions:
    print(f"\nQ: {question}")
    answer = query_knowledge_base(question)
    print(f"A: {answer}")

In [None]:
print(f"\nDemo complete!")
print(f"Knowledge Base ID: {kb_id}")
print(f"S3 Bucket: {BUCKET_NAME}")