## Dynamic Metadata Filtering for Knowledge Bases

In [1]:
import json
import boto3

In [2]:
# Session init
session = boto3.session.Session()
region = session.region_name
bedrock = boto3.client("bedrock-runtime", region_name=region)
bedrock_agent_runtime = boto3.client("bedrock-agent-runtime")

## Implement Entity Extraction using Tool Use
We'll define a tool for entity extraction with very basic instructions and use it with Amazon Bedrock:


In [3]:
subcategory_filters = [
        "3-D Puzzles", "Accessories", "Action Man", "Activity Centres", "Alternative Medicine",
        "Art & Craft Supplies", "Art Sand", "BRIO", "Banners, Stickers & Confetti", "Barbie",
        "Baskets & Bins", "Beach Toys", "Bikes, Trikes & Ride-ons", "Blackboards", "Board Games",
        "Bob the Builder", "Boxes & Organisers", "Braces, Splints & Slings", "Brain Teasers", "Card Games",
        "Casino Equipment", "Charms", "Chess", "Children's Bedding", "Children's Chalk",
        "Children's Craft Kits", "Chocolate", "Climbing Frames", "Clothing & Accessories", "Collectible Figures & Memorabilia",
        "Colouring Pencils", "Colouring Pens & Markers", "Costumes", "Cowboys & Indians", "Crayola",
        "Cup & Ball Games", "DVD Games", "Darts & Accessories", "Decorations", "Decorative Accessories",
        "Desk Accessories & Storage Products", "Dice & Dice Games", "Digital Cameras", "Dinosaurs", "Disney",
        "Doll Making", "Dolls' House Dolls & Accessories", "Dominoes & Tile Games", "Drawing & Painting Supplies", "Drinking Games",
        "Early Learning Centre", "Educational Computers & Accessories", "Educational Games", "Emergency Services", "Erasers & Correction Supplies",
        "Erotic Clothing", "Farm & Animals", "Fashion Dolls & Accessories", "Felt Kits", "Finger Puppets",
        "Football", "Frame Jigsaws", "Garden Tools", "Greenhouses & Plant Germination Equipment", "Guitars & Strings",
        "Hand Puppets", "Hand Tools", "Harry Potter", "Hasbro", "Hornby",
        "Instruments", "Invitations", "Jigsaw Accessories", "Jigsaws", "Kid Venture",
        "Kids Remote & App Controlled Toys", "Kids'", "Kitchen Tools & Gadgets", "Kites & Flight Toys", "Knights & Castles",
        "Lab Instruments & Equipment", "Labels, Index Dividers & Stamps", "LeapFrog", "Learning & Activity Toys", "Literacy & Spelling",
        "Markers & Highlighters", "Marvin's Magic", "Mathematics", "Military", "Model Building Kits",
        "Model Trains & Railway Sets", "Mystery Games", "Novelty", "Pain & Fever", "Painting By Numbers",
        "Paper & Stickers", "Party Bags", "Party Favours", "Party Tableware", "Pencils",
        "Pens & Refills", "Pianos & Keyboards", "Pirates", "Play Tools", "Playsets",
        "Pushchair Toys", "Racket Games", "Rattles", "Ravensburger", "Remote Controlled Devices",
        "Robots", "Rockers & Ride-ons", "Rocking Horses", "Sandwich Spreads, Pates & Pastes", "Schoolbags & Backpacks",
        "Science Fiction & Fantasy", "Seasonal Décor", "Shops & Accessories", "Sleeping Gear", "Slot Cars, Race Tracks & Accessories",
        "Soft Dolls", "Sorting, Stacking & Plugging Toys", "Sound Toys", "Specialty & Decorative Lighting", "Spinning Tops",
        "Sport", "Star Wars", "Strategy Games", "Tabletop & Miniature Gaming", "Target Games",
        "Teaching Clocks", "Thomas & Friends", "Thunderbirds", "Tomy", "Tops & T-Shirts",
        "Toy Story", "Toy Trains & Accessories", "Toy Vehicle Playsets", "Toy Vehicles & Accessories", "Trading Cards & Accessories",
        "Transportation & Traffic", "Travel & Pocket Games", "Trivia & Quiz Games", "Upstarts", "VTech",
        "WWE", "Wind & Brass", "Winnie-the-Pooh", "others"
    ]

In [4]:
# Get all categories from Database; i.e. refresh once a day from DB.
db_product_categories = ",".join(str(x) for x in subcategory_filters)

## Prompt Definition

In [5]:
def system_prompt():
    return """You are an expert in extracting the main product category, for products that users are looking for.
    Do not return any other text, except for the product category you have extracted.
    """

def generate_prompt(question, product_categories):
    return f"""
    Below, you have a list in XML tags containing the product categories we have in our store.
    Your task is to return the only category that is the most representative for the user question's context or semantic.
    <product_categories>
    {product_categories}. 
    </product_categories>
    For example, if the user question is "I'm looking DIY sand for my son", then you'll need to return "Art Sand".
    If you cannot map or find the category value, use 'others'. 
    If the question is not related to an ecommerce store, use 'unknown'.

    Respond to the following user question:
    {question}
    """

In [6]:
# SDK
bedrock_runtime = boto3.client('bedrock-runtime')

In [7]:
def get_category(question, product_categories,model_id="anthropic.claude-3-5-haiku-20241022-v1:0"):
    """
    Retrieves the product category from the given text using the specified tool properties.

    Args:
        text (str): The input text to be processed.
        
    Returns:
        str: The product category if found.
    """ 
    try:
      response = bedrock_runtime.converse(
        modelId=model_id,
        system=[{
          "text": system_prompt()
        }],
        messages=[{
          "role": "user",
          "content": [{"text": generate_prompt(question,product_categories)}]
        }],
        inferenceConfig={
            "maxTokens": 4096,
            "temperature": 0.5
        },
      )

      # Return only category
      category_product = response['output']['message']['content'][0]['text']

      return category_product
  
    except Exception as e:
      print(f"Error while mapping user query with product category: {e}")
      return None

## Construct Metadata Filter
Now, let's create a function to construct the metadata filter based on the extracted entities:

In [8]:
def construct_metadata_filter(product_category):
    metadata_filter = {"andAll": []}

    if product_category and product_category != 'unknown':
        metadata_filter["andAll"].append({
            "equals": {
                "key": "subcategory_1",
                "value": product_category
            }
        })
    
    else:
        print("Product category is unknown. Skipping metadata filter.")

    return metadata_filter if metadata_filter["andAll"] else None

## Example

In [9]:
user_question="Tienes arena para manualidades?"
user_question="I'm looking for sand to do DIY activities with my son"
# user_question="Brontosaurus para niños"
# user_question="Tienes el juego de mesa Clue?"
# user_question="Tienes tijeras para cortar el césped?"

In [None]:
extracted_entities = get_category(question=user_question,product_categories=db_product_categories)
metadata_filter = construct_metadata_filter(extracted_entities)
print('Here is the prepared metadata filters:')
print(json.dumps(metadata_filter, indent=4))

---
### Finalmente, podemos llamar la API de Retrieve con los nuevos filtros:
> e.g.
```python
def process_query(text, tool_properties):
    extracted_entities = get_category(text, tool_properties)
    metadata_filter = construct_metadata_filter(extracted_entities)
    
    # Call Bedrock KB with Metadata Filter
    response = bedrock_agent_runtime.retrieve_and_generate(
        knowledgeBaseId=kb_id,
        retrievalConfiguration={
            "vectorSearchConfiguration": {
                "filter": metadata_filter
            }
        },
        retrievalQuery={
            "text": "Tienes tijeras para cortar el césped?"
        },
        modelArn='eu.claude...'
    return response
```
Tal cual nos los solicita la [API de Bedrock](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html):

![](./images/bedrock_retrieve_api.png)