# Demo 2: S3 Vectors Direct Interaction
Learn how to create and interact with S3 Vectors using boto3

In [5]:
import boto3
import numpy as np
import json
import time
from typing import List, Dict

In [6]:
# Initialize clients
s3vectors = boto3.client('s3vectors', region_name='us-east-1')
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')

In [7]:
# Configuration
VECTOR_BUCKET_NAME = "s3-vectors-demo2"
INDEX_NAME = "product-embeddings"
EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"

## Step 1: Create S3 Vector Bucket

In [8]:
# Create vector bucket
try:
    bucket_response = s3vectors.create_vector_bucket(
        vectorBucketName=VECTOR_BUCKET_NAME
    )
    print(f"Created vector bucket: {VECTOR_BUCKET_NAME}")
    print(f"Bucket ARN: {bucket_response['vectorBucketArn']}")
except Exception as e:
    if "BucketAlreadyExists" in str(e):
        print(f"Vector bucket {VECTOR_BUCKET_NAME} already exists")
    else:
        print(f"Error creating bucket: {e}")

Error creating bucket: An error occurred (ConflictException) when calling the CreateVectorBucket operation: A vector bucket with the specified name already exists


## Step 2: Create Vector Index

In [9]:
# Create vector index
try:
    index_response = s3vectors.create_index(
        vectorBucketName=VECTOR_BUCKET_NAME,
        indexName=INDEX_NAME,
        dimension=1024,  # Titan embedding dimensions
        dataType="float32",
        distanceMetric="cosine"
    )
    print(f"Created vector index: {INDEX_NAME}")
    print(f"Index ARN: {index_response['indexArn']}")
except Exception as e:
    if "IndexAlreadyExists" in str(e):
        print(f"Vector index {INDEX_NAME} already exists")
    else:
        print(f"Error creating index: {e}")

Error creating index: An error occurred (ConflictException) when calling the CreateIndex operation: An index with the specified name already exists


## Step 3: Generate Sample Embeddings

In [11]:
def get_embedding(text: str) -> List[float]:
    """Get embedding for text using Titan model"""
    response = bedrock.invoke_model(
        modelId=EMBEDDING_MODEL,
        body=json.dumps({"inputText": text}),
        contentType="application/json"
    )
    result = json.loads(response['body'].read())
    return result['embedding']

# Sample product data
products = [
    {
        "id": "prod-001",
        "text": "Wireless Bluetooth headphones with noise cancellation and 30-hour battery life",
        "metadata": {"category": "electronics", "price_range": "high", "brand": "TechCorp"}
    },
    {
        "id": "prod-002", 
        "text": "Organic cotton t-shirt in multiple colors, comfortable and sustainable",
        "metadata": {"category": "clothing", "price_range": "medium", "brand": "EcoWear"}
    },
    {
        "id": "prod-003",
        "text": "Smart fitness tracker with heart rate monitor and GPS tracking",
        "metadata": {"category": "electronics", "price_range": "medium", "brand": "FitTech"}
    },
    {
        "id": "prod-004",
        "text": "Premium leather wallet with RFID blocking technology",
        "metadata": {"category": "accessories", "price_range": "high", "brand": "LuxLeather"}
    },
    {
        "id": "prod-005",
        "text": "Portable phone charger with fast charging and compact design",
        "metadata": {"category": "electronics", "price_range": "low", "brand": "PowerUp"}
    }
]

print(f"Generating embeddings for {len(products)} products...")
for product in products:
    product['embedding'] = get_embedding(product['text'])
    print(f"✓ Generated embedding for {product['id']}")

print("All embeddings generated!")

Generating embeddings for 5 products...
✓ Generated embedding for prod-001
✓ Generated embedding for prod-002
✓ Generated embedding for prod-003
✓ Generated embedding for prod-004
✓ Generated embedding for prod-005
All embeddings generated!


## Step 4: Insert Vectors into S3 Vectors

In [19]:
# Prepare vectors for insertion
vectors_to_insert = []
for product in products:
    vector_data = {
        "key": product['id'],
        "data": {"float32": product['embedding']},
        "metadata": product['metadata'],
    }
    vectors_to_insert.append(vector_data)

# Insert vectors
try:
    put_response = s3vectors.put_vectors(
        vectorBucketName=VECTOR_BUCKET_NAME,
        indexName=INDEX_NAME,
        vectors=vectors_to_insert
    )
    print(f"Successfully inserted {len(vectors_to_insert)} vectors")
    print(f"Request ID: {put_response['ResponseMetadata']['RequestId']}")
except Exception as e:
    print(f"Error inserting vectors: {e}")

Successfully inserted 5 vectors
Request ID: 4bec89f5-3eb6-4b90-8fd1-7410ecb5ec93


## Step 5: Query Vectors (Similarity Search)

In [23]:
def search_products(query_text: str, top_k: int = 3, filters: Dict = None):
    """Search for similar products using vector similarity"""
    # Get embedding for query
    query_embedding = get_embedding(query_text)
    
    # Prepare query parameters
    query_params = {
        "vectorBucketName": VECTOR_BUCKET_NAME,
        "indexName": INDEX_NAME,
        "queryVector": {'float32': query_embedding},
        "topK": top_k,
        "returnMetadata": True,
        "returnDistance": True
    }
    
    # Add filters if provided
    if filters:
        query_params["filter"] = filters
    
    try:
        response = s3vectors.query_vectors(**query_params)
        return response['vectors']
    except Exception as e:
        print(f"Error querying vectors: {e}")
        return []

# Test similarity search
print("=== Similarity Search Examples ===")

# Search 1: General electronics query
print("\n1. Searching for 'audio device for music'...")
results = search_products("audio device for music", top_k=3)
for i, result in enumerate(results, 1):
    print(f"   {i}. {result['key']} (score: {result['distance']:.4f})")

# Search 2: Filtered search
print("\n2. Searching for electronics under 'medium' price range...")
electronics_filter = {
    "$and": [
        {"category": {"$eq": "electronics"}},
        {"price_range": {"$eq": "medium"}}
    ]
}
results = search_products("fitness device", top_k=5, filters=electronics_filter)
for i, result in enumerate(results, 1):
    print(f"   {i}. {result['key']} (score: {result['distance']:.4f})")

# Search 3: Clothing query
print("\n3. Searching for 'comfortable clothing'...")
results = search_products("comfortable clothing", top_k=2)
for i, result in enumerate(results, 1):
    print(f"   {i}. {result['key']} (score: {result['distance']:.4f})")

=== Similarity Search Examples ===

1. Searching for 'audio device for music'...
   1. prod-001 (score: 0.8193)
   2. prod-003 (score: 0.8780)
   3. prod-005 (score: 0.9052)

2. Searching for electronics under 'medium' price range...
   1. prod-003 (score: 0.5603)

3. Searching for 'comfortable clothing'...
   1. prod-002 (score: 0.7089)
   2. prod-001 (score: 0.9103)


## Step 6: Get Specific Vectors

In [29]:
# Get specific vectors by ID
vector_ids = ["prod-001", "prod-003"]

try:
    get_response = s3vectors.get_vectors(
        vectorBucketName=VECTOR_BUCKET_NAME,
        indexName=INDEX_NAME,
        keys=vector_ids
    )
    
    print(f"Retrieved {len(get_response['vectors'])} vectors:")
    for vector in get_response['vectors']:
        print(f"- {vector['key']}")
        
except Exception as e:
    print(f"Error getting vectors: {e}")

Retrieved 2 vectors:
- prod-003
- prod-001


## Step 7: List Vectors in Index

In [32]:
# List all vectors in the index
try:
    list_response = s3vectors.list_vectors(
        vectorBucketName=VECTOR_BUCKET_NAME,
        indexName=INDEX_NAME,
        maxResults=10
    )
    
    print(f"Found {len(list_response['vectors'])} vectors in index:")
    for vector_id in list_response['vectors']:
        print(f"- {vector_id}")
        
except Exception as e:
    print(f"Error listing vectors: {e}")

Found 5 vectors in index:
- {'key': 'prod-004'}
- {'key': 'prod-005'}
- {'key': 'prod-003'}
- {'key': 'prod-002'}
- {'key': 'prod-001'}


## Step 8: Update Vectors

In [37]:
# Update a vector with new metadata
updated_text = "Wireless Bluetooth headphones with noise cancellation, 30-hour battery, and premium sound quality"
updated_embedding = get_embedding(updated_text)

updated_vector = {
    "key": "prod-001",
    "data": {"float32": updated_embedding},
    "metadata": {
        "category": "electronics", 
        "price_range": "high", 
        "brand": "TechCorp",
        "featured": "true"  # New metadata field
    }
}

try:
    update_response = s3vectors.put_vectors(
        vectorBucketName=VECTOR_BUCKET_NAME,
        indexName=INDEX_NAME,
        vectors=[updated_vector]
    )
    print("Successfully updated prod-001 with new description and metadata")
except Exception as e:
    print(f"Error updating vector: {e}")

Successfully updated prod-001 with new description and metadata


## Step 9: Advanced Filtering Examples

In [38]:
print("=== Advanced Filtering Examples ===")

# Filter 1: Multiple categories
print("\n1. Electronics OR Accessories:")
multi_category_filter = {
    "Or": [
        {"Equals": {"Key": "category", "Value": "electronics"}},
        {"Equals": {"Key": "category", "Value": "accessories"}}
    ]
}
results = search_products("premium product", top_k=5, filters=multi_category_filter)
for result in results:
    metadata = result['FilterableMetadata']
    print(f"   {result['VectorId']}: {metadata['category']} - {metadata['brand']}")

# Filter 2: Price range exclusion
print("\n2. Not low price range:")
not_low_price_filter = {
    "Not": {
        "Equals": {"Key": "price_range", "Value": "low"}
    }
}
results = search_products("quality product", top_k=5, filters=not_low_price_filter)
for result in results:
    metadata = result['FilterableMetadata']
    print(f"   {result['VectorId']}: {metadata['price_range']} - {metadata['brand']}")

# Filter 3: Complex AND/OR combination
print("\n3. High-end electronics OR any TechCorp product:")
complex_filter = {
    "Or": [
        {
            "And": [
                {"Equals": {"Key": "category", "Value": "electronics"}},
                {"Equals": {"Key": "price_range", "Value": "high"}}
            ]
        },
        {"Equals": {"Key": "brand", "Value": "TechCorp"}}
    ]
}
results = search_products("technology device", top_k=5, filters=complex_filter)
for result in results:
    metadata = result['FilterableMetadata']
    print(f"   {result['VectorId']}: {metadata['category']} - {metadata['price_range']} - {metadata['brand']}")

=== Advanced Filtering Examples ===

1. Electronics OR Accessories:
Error querying vectors: Parameter validation failed:
Unknown parameter in input: "Filter", must be one of: vectorBucketName, indexName, indexArn, topK, queryVector, filter, returnMetadata, returnDistance
Invalid type for parameter queryVector, value: [-0.06793471425771713, -0.01657717116177082, 0.014087412506341934, -0.05549586936831474, 0.01790292002260685, 0.011189324781298637, 0.020040027797222137, -0.014108624309301376, 0.011658639647066593, -0.02417106367647648, 0.004051655530929565, -0.04046568274497986, 0.017563527449965477, 0.0014569986378774047, 0.02243167906999588, -0.03327365964651108, -0.07789573073387146, -0.01853927969932556, -0.004267587326467037, 0.026056278496980667, -0.03753992170095444, -0.015296496450901031, 0.07990822196006775, 0.030311934649944305, -0.05829452723264694, -0.07562074810266495, -0.0014954452635720372, -0.06839142739772797, 0.021742289885878563, 0.020002907142043114, 0.025051359087228

## Step 10: Delete Vectors

In [None]:
# Delete a specific vector
vector_to_delete = "prod-005"

try:
    delete_response = s3vectors.delete_vectors(
        VectorBucketName=VECTOR_BUCKET_NAME,
        IndexName=INDEX_NAME,
        VectorIds=[vector_to_delete]
    )
    print(f"Successfully deleted vector: {vector_to_delete}")
except Exception as e:
    print(f"Error deleting vector: {e}")

# Verify deletion by listing vectors again
try:
    list_response = s3vectors.list_vectors(
        VectorBucketName=VECTOR_BUCKET_NAME,
        IndexName=INDEX_NAME
    )
    print(f"\nRemaining vectors: {list_response['VectorIds']}")
except Exception as e:
    print(f"Error listing vectors: {e}")

## Step 11: Cleanup (Optional)

In [None]:
# Uncomment to clean up resources
# WARNING: This will delete all data!

# # Delete vector index
# try:
#     s3vectors.delete_vector_index(
#         VectorBucketName=VECTOR_BUCKET_NAME,
#         IndexName=INDEX_NAME
#     )
#     print(f"Deleted vector index: {INDEX_NAME}")
# except Exception as e:
#     print(f"Error deleting index: {e}")

# # Delete vector bucket
# try:
#     s3vectors.delete_vector_bucket(
#         VectorBucketName=VECTOR_BUCKET_NAME
#     )
#     print(f"Deleted vector bucket: {VECTOR_BUCKET_NAME}")
# except Exception as e:
#     print(f"Error deleting bucket: {e}")

print("Demo completed! Uncomment cleanup section to remove resources.")

## Summary

This demo showed how to:

1. **Create S3 Vector Bucket** - Purpose-built storage for vectors
2. **Create Vector Index** - Organize vectors with metadata schema
3. **Generate Embeddings** - Use Bedrock Titan to create vector embeddings
4. **Insert Vectors** - Store vectors with filterable and non-filterable metadata
5. **Query Vectors** - Perform similarity search with optional filtering
6. **Get Vectors** - Retrieve specific vectors by ID
7. **List Vectors** - Browse all vectors in an index
8. **Update Vectors** - Modify existing vectors and metadata
9. **Advanced Filtering** - Complex AND/OR/NOT filter combinations
10. **Delete Vectors** - Remove specific vectors from the index

### Key Benefits of S3 Vectors:
- **Cost-effective**: Pay only for what you use
- **Scalable**: Up to 2 billion vectors per index
- **Fast**: Sub-second to 100ms query latency
- **Flexible**: Rich metadata filtering capabilities
- **Integrated**: Native AWS service integrations