# AWS Bedrock RAG Infrastructure Setup

This notebook guides you through creating AWS infrastructure for a Bedrock RAG implementation.
**VPC creation is handled by an external script (`create_vpc_script.py`).**
Subsequent resources (Aurora, S3, Bedrock KB) are created step-by-step in this notebook.

## 1. Imports
Import necessary libraries. 

In [1]:
import boto3
import json
import time
import random
import string
import subprocess # For running the external script
import os
import sys
from botocore.exceptions import ClientError

## 2. Configuration
Define configuration variables for the infrastructure.

In [None]:
REGION = 'us-west-2' # Or your desired region
VPC_NAME = '' # Base name used by the VPC script

In [None]:
# Aurora Config
AURORA_CLUSTER_ENDPOINT = "" # Changed name slightly
AURORA_ARN = ""
AURORA_SECRET_ARN = ""

In [36]:
# Bedrock KB Config
KB_NAME = "my-bedrock-kb-func" # Changed name slightly
# Schema/table/fields KB expects (MUST be created manually in DB beforehand)
AURORA_KB_SCHEMA_NAME = "postgres"
AURORA_KB_TABLE_NAME = "bedrock_integration.embeddings"
AURORA_KB_PK_FIELD = "id"
AURORA_KB_METADATA_FIELD = "metadata"
AURORA_KB_TEXT_FIELD = "text_content"
AURORA_KB_VECTOR_FIELD = "embedding"
EMBEDDING_MODEL_ARN = f'arn:aws:bedrock:{REGION}::foundation-model/amazon.titan-embed-text-v2:0'

In [None]:
# Other Variables (will be populated after client initialization)
ACCOUNT_ID = None
S3_BUCKET_NAME = None # Will be set after getting account ID

# Dictionary to store resource details as they are created
created_resources = {}

## 3. Initialize AWS Clients
Create Boto3 clients needed for the different services.

In [6]:
print(f"Initializing Boto3 clients for region: {REGION}...")
session = boto3.Session(region_name=REGION)

# Clients
sts_client = session.client('sts')
ec2_client = session.client('ec2')
rds_client = session.client('rds')
s3_client = session.client('s3')
iam_client = session.client('iam')
bedrock_agent_client = session.client('bedrock-agent')
secretsmanager_client = session.client('secretsmanager')

Initializing Boto3 clients for region: us-west-2...


In [7]:
# Get Account ID
try:
    ACCOUNT_ID = sts_client.get_caller_identity()['Account']
    print(f"AWS Account ID: {ACCOUNT_ID}")
    # Set bucket name now that we have Account ID
    S3_BUCKET_NAME = f"bedrock-kb-{ACCOUNT_ID}-{REGION}" # Unique bucket name
    print(f"Target S3 Bucket Name: {S3_BUCKET_NAME}")
except ClientError as e:
    raise Exception(f"Could not get AWS Account ID. Check credentials/permissions: {e}")

AWS Account ID: 058264544288
Target S3 Bucket Name: bedrock-kb-058264544288-us-west-2


## 4. Prepare Aurora Postgress for KB
Run the scripts to prepare the Postgress DB for KB

In [8]:
import psycopg2 as pg

get_secret_value_response = secretsmanager_client.get_secret_value(
            SecretId=AURORA_SECRET_ARN
        )

credentials = json.loads(get_secret_value_response['SecretString'])

conn = pg.connect(
        host=AURORA_CLUSTER_ENDPOINT,
        user=credentials['username'],
        database="postgres",
        password=credentials['password']
    )

In [9]:
bedrock_db_password = "something"

In [10]:
try:
    cur = conn.cursor()
    # Create pgvector extension
    cur.execute('CREATE EXTENSION IF NOT EXISTS vector;')

    conn.commit()
    print("Extension completed successfully")

    cur.execute('CREATE SCHEMA IF NOT EXISTS bedrock_integration;')
    conn.commit()
    print("SCHEMA completed successfully")

    try:
        cur.execute(f"CREATE ROLE bedrock_user WITH PASSWORD '{bedrock_db_password}' LOGIN;")
        conn.commit()
    except Exception as e:
        print(f"Error creating user: {e}")
        cur.execute("ROLLBACK")
        conn.commit()

    cur.execute('GRANT ALL ON SCHEMA bedrock_integration to bedrock_user;')
    conn.commit()

    print("Grant completed successfully")
    
    # Create table for storing embeddings
    cur.execute('''
        CREATE TABLE IF NOT EXISTS bedrock_integration.embeddings (
            id uuid PRIMARY KEY,
            text_content TEXT NOT NULL,
            embedding vector(1024),  -- Dimension size for text embeddings
            metadata JSONB,
            created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
        );
    ''')
    print("Create table completed successfully")

    cur.execute("""ALTER TABLE bedrock_integration.embeddings OWNER TO bedrock_user;""")
    conn.commit()

    cur.execute("SET SESSION AUTHORIZATION bedrock_user;")
    conn.commit()
    print("Change to role completed successfully")
    
    # Create index for vector similarity search
    cur.execute('''
        CREATE INDEX ON bedrock_integration.embeddings USING hnsw (embedding vector_cosine_ops) WITH (ef_construction=256)
    ''')
    print("Cretate index completed successfully")
    
    conn.commit()

    cur.execute('''
                CREATE INDEX ON bedrock_integration.embeddings USING gin (to_tsvector('simple', text_content))
                ''')
    print("Cretate index completed successfully")
    print("Database initialization completed successfully")

        
except Exception as e:
    print(f"Error initializing database: {e}")
    conn.rollback()
    code = 500
    message = f"Error initializing database: {e}"
finally:
    cur.close()
    conn.close()

Extension completed successfully
SCHEMA completed successfully
Grant completed successfully
Create table completed successfully
Change to role completed successfully
Cretate index completed successfully
Database initialization completed successfully


## 5. Create Bedrock Knowledge Base

Create a knoweldge Base with Aurora Postgres DB as vector database

Create role for the KB to assume

In [11]:
role_name = f"{KB_NAME}-execution-role"

assume_role_policy = json.dumps({"Version": "2012-10-17", "Statement": [{"Effect": "Allow", "Principal": {"Service": "bedrock.amazonaws.com"}, "Action": "sts:AssumeRole"}]})

role_response = iam_client.create_role(RoleName=role_name, AssumeRolePolicyDocument=assume_role_policy, Description=f"Execution role for KB {KB_NAME}", Tags=[{'Key': 'Name', 'Value': role_name}])
role_arn = role_response['Role']['Arn']

print(f"Created Role ARN: {role_arn}. Waiting 15s for propagation...")

    Created Role ARN: arn:aws:iam::058264544288:role/my-bedrock-kb-func-execution-role. Waiting 15s for propagation...


In [24]:
kb_policy_name = f"{KB_NAME}-execution-policy-inline"
kb_policy_document = json.dumps({"Version": "2012-10-17", "Statement": [
        {"Sid": "S3Permissions", "Effect": "Allow", "Action": ["s3:GetObject", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{S3_BUCKET_NAME}", f"arn:aws:s3:::{S3_BUCKET_NAME}/*"]},
        {"Sid": "BedrockInvokeModel", "Effect": "Allow", "Action": "bedrock:InvokeModel", "Resource": EMBEDDING_MODEL_ARN},
        {"Sid": "RDSDataAPIPermissions", "Effect": "Allow", "Action": ["rds-data:ExecuteStatement", "rds-data:BatchExecuteStatement", "rds-data:BeginTransaction", "rds-data:CommitTransaction", "rds-data:RollbackTransaction"], "Resource": AURORA_ARN},
        {"Sid": "RDSDescribeCluster", "Effect": "Allow", "Action": ["rds:DescribeDBClusters"], "Resource": "*"},
        {"Sid": "SecretsManagerPermissions", "Effect": "Allow", "Action": "secretsmanager:GetSecretValue", "Resource": AURORA_SECRET_ARN}
    ]})

print(f"Attaching Inline Policy: {kb_policy_name} to role {role_name}")

Attaching Inline Policy: my-bedrock-kb-func-execution-policy-inline to role my-bedrock-kb-func-execution-role


In [25]:
iam_client.put_role_policy(RoleName=role_name, PolicyName=kb_policy_name, PolicyDocument=kb_policy_document)
print(f"Policy attached. Waiting 10s...")

Policy attached. Waiting 10s...


Create Knowledgebase

In [26]:
list_kbs_response = bedrock_agent_client.list_knowledge_bases()
existing_kb = next((kb for kb in list_kbs_response.get('knowledgeBaseSummaries', []) if kb['name'] == KB_NAME), None)

print(existing_kb)

None


In [40]:
kb_response = bedrock_agent_client.create_knowledge_base(
                name=KB_NAME, 
                description=f"KB for RAG using {AURORA_CLUSTER_ENDPOINT}", roleArn=role_arn,
                knowledgeBaseConfiguration={'type': 'VECTOR', 'vectorKnowledgeBaseConfiguration': {'embeddingModelArn': EMBEDDING_MODEL_ARN}},
                storageConfiguration={'type': 'RDS', 'rdsConfiguration': {
                    'resourceArn': AURORA_ARN, 'credentialsSecretArn': AURORA_SECRET_ARN, 'databaseName': AURORA_KB_SCHEMA_NAME,
                    'tableName': AURORA_KB_TABLE_NAME, 'fieldMapping': {
                        'primaryKeyField': AURORA_KB_PK_FIELD, 'vectorField': AURORA_KB_VECTOR_FIELD,
                        'textField': AURORA_KB_TEXT_FIELD, 'metadataField': AURORA_KB_METADATA_FIELD
                        }
                    }
                }, 
                tags={'Name': KB_NAME}
             )

kb_id = kb_response['knowledgeBase']['knowledgeBaseId']
kb_arn = kb_response['knowledgeBase']['knowledgeBaseArn']
print(f"Knowledge Base created with ID: {kb_id}, ARN: {kb_arn}")

Knowledge Base created with ID: 950X2HZVWA, ARN: arn:aws:bedrock:us-west-2:058264544288:knowledge-base/950X2HZVWA


## 6. Create S3 Data Source

Create a data source for the knoweldge base

In [41]:
data_source_name = 's3-docs-source'

list_ds_response = bedrock_agent_client.list_data_sources(knowledgeBaseId=kb_id)
existing_ds = next((ds for ds in list_ds_response.get('dataSourceSummaries', []) if ds['name'] == data_source_name), None)

print(existing_ds)

None


In [42]:
data_source_response = bedrock_agent_client.create_data_source(
                knowledgeBaseId=kb_id, name=data_source_name, description=f"S3 source: {S3_BUCKET_NAME}",
                dataSourceConfiguration={'type': 'S3', 's3Configuration': {'bucketArn': f"arn:aws:s3:::{S3_BUCKET_NAME}"}}
             )
data_source_id = data_source_response['dataSource']['dataSourceId']
print(f"    Data Source created with ID: {data_source_id}")

print("    Waiting for Data Source status AVAILABLE...")
        

    Data Source created with ID: FU7NDDLCCH
    Waiting for Data Source status AVAILABLE...


In [43]:
while True:
    ds_status_response = bedrock_agent_client.get_data_source(knowledgeBaseId=kb_id, dataSourceId=data_source_id)
    status = ds_status_response['dataSource']['status']
    print(f"      Current status: {status}")
    if status == 'AVAILABLE': break
    elif status == 'DELETE_UNSUCCESSFUL': raise Exception(f"Data Source {status}: {ds_status_response['dataSource'].get('failureReasons', ['Unknown'])}")
    time.sleep(20)

      Current status: AVAILABLE
