## Amazon Kendra Search

In [21]:
# Import necessary libraries and load environment variables

from dotenv import load_dotenv, find_dotenv, set_key
import dotenv
import os

# loading environment variables that are stored in local file
local_env_filename = 'dev.env'
load_dotenv(find_dotenv(local_env_filename),override=True)

os.environ['REGION'] = os.getenv('REGION')
os.environ['KENDRA_INDEX'] = os.getenv('KENDRA_INDEX')
os.environ['KENDRA_ROLE'] = os.getenv('KENDRA_ROLE')
os.environ['CUSTOM_DATA_SOURCE_ID_1'] = os.getenv('CUSTOM_DATA_SOURCE_ID_1')
os.environ['CUSTOM_DATA_SOURCE_ID_2'] = os.getenv('CUSTOM_DATA_SOURCE_ID_2')
os.environ['AMAZON_Q_APP_ID'] = os.getenv('AMAZON_Q_APP_ID')
os.environ['Q_CUSTOM_DATA_SOURCE_ID_1'] = os.getenv('Q_CUSTOM_DATA_SOURCE_ID_1')
os.environ['Q_CUSTOM_DATA_SOURCE_ID_2'] = os.getenv('Q_CUSTOM_DATA_SOURCE_ID_2')
os.environ['DEMO_S3_BUCKET'] = os.getenv('DEMO_S3_BUCKET')
os.environ['DEMO_S3_KEY'] = os.getenv('DEMO_S3_KEY')
os.environ['CLOUDFRONT_URL'] = os.getenv('CLOUDFRONT_URL')
os.environ['S3_DATA_SOURCE_ID'] = os.getenv('S3_DATA_SOURCE_ID')

REGION = os.environ['REGION']
KENDRA_INDEX = os.environ['KENDRA_INDEX']
KENDRA_ROLE = os.environ['KENDRA_ROLE']
CUSTOM_DATA_SOURCE_ID_1 = os.environ['CUSTOM_DATA_SOURCE_ID_1']
CUSTOM_DATA_SOURCE_ID_2 = os.environ['CUSTOM_DATA_SOURCE_ID_2']
AMAZON_Q_APP_ID = os.environ['AMAZON_Q_APP_ID']
Q_CUSTOM_DATA_SOURCE_ID_1 = os.environ['Q_CUSTOM_DATA_SOURCE_ID_1']
Q_CUSTOM_DATA_SOURCE_ID_2 = os.environ['Q_CUSTOM_DATA_SOURCE_ID_2']
DEMO_S3_BUCKET = os.environ['DEMO_S3_BUCKET']
DEMO_S3_KEY = os.environ['DEMO_S3_KEY']
CLOUDFRONT_URL = os.environ['CLOUDFRONT_URL']
S3_DATA_SOURCE_ID = os.environ['S3_DATA_SOURCE_ID']

# Amazon Kendra Search example

In [22]:
import boto3

kendra = boto3.client("kendra")

query = "What is the name of the product with the highest rating?"

response = kendra.query(
    QueryText = query,
    IndexId = KENDRA_INDEX
)

# print the results
for result in response['ResultItems']:
    print(result['DocumentTitle']['Text'], result['DocumentExcerpt']['Text'])

productreviews The productreviews table contains reviews for different products. It includes columns for the product id, product name, review text, rating, reviewer, and created by. Sample values include Wireless Earbuds with a rating of 5, Smart Watch with a rating of 4, Laptop with a rating of 5, Coffee Maker wi
products ...The products table has columns product_id, product_name, supplier_id, category_id, quantity_per_unit, unit_price, units_in_stock, units_on_order, reorder_level, and discontinued. The table contains information about products, such as their names, suppliers, categories, quantities, prices, and whether they have been discontinued. For example, product Chai has an id of 1, is supplied by supplier 8, belongs to category 1, comes in boxes of 30 bags, costs $18 per unit, has 39 units in stock, 0 units on order, a reorder level of 10, and has not been discontinued...
productreviews ...The productreviews table contains reviews for different products. It includes columns f

In [23]:
response['TotalNumberOfResults']

11

In [24]:
response.keys()

dict_keys(['QueryId', 'ResultItems', 'FacetResults', 'TotalNumberOfResults', 'ResponseMetadata'])

### Display results by relevance score


In [25]:
def display_results(response:dict, user_profile:str=None) -> None:
    list_1 = []
    for i,item in enumerate(response['ResultItems']):
        title = item['DocumentTitle']['Text']
        score = item['ScoreAttributes']['ScoreConfidence']
        list_item = f'{i}. [{score}] {title}'
        list_1.append(list_item)
        
    results = list_1
    _ = [print(item) for item in results]

display_results(response)


0. [MEDIUM] productreviews
1. [VERY_HIGH] products
2. [HIGH] productreviews
3. [MEDIUM] categories
4. [MEDIUM] customers
5. [MEDIUM] order_details
6. [MEDIUM] orders
7. [MEDIUM] employees
8. [MEDIUM] customer_customer_demo
9. [MEDIUM] employee_territories


# Kendra Search results with user context filter

You can filter a user's search results based on the user or their group access to documents. You can use a user token, user ID, or user attribute to filter documents. Amazon Kendra can also map users to their groups. You can choose to use AWS IAM Identity Center as your identity store/source.

User context filtering is a kind of personalized search with the benefit of controlling access to documents. For example, not all teams that search the company portal for information should access top-secret company documents, nor are these documents relevant to all users. Only specific users or groups of teams given access to top-secret documents should see these documents in their search results.

When a document is indexed into Amazon Kendra, a corresponding access control list (ACL) is ingested for most documents. The ACL specifies which user names and group names are allowed or denied access to the document. Documents without an ACL are public documents.

In [32]:
# Kendra Search results with user context filter
# https://docs.aws.amazon.com/kendra/latest/dg/user-context-filter.html
import boto3

kendra = boto3.client("kendra")

query = "What LLM papers are there?"
# ensure you are using an index with ACL that usernames and groups
response = kendra.query(
    QueryText = query,
    IndexId = KENDRA_INDEX,
    UserContext = {
        "UserId": "huthmac@amazon.com"
    }
    )

print(f'Search results for user: huthmac@amazon.com')
display_results(response)

print(f'Search results for user: felixh')
response = kendra.query(
    QueryText = query,
    IndexId = KENDRA_INDEX,
    UserContext = {
        "UserId": "felixh"
    }
    )
display_results(response)

Search results for user: huthmac@amazon.com
0. [HIGH] combineLLMwithGraph
1. [MEDIUM] text2sqlpaper0
2. [MEDIUM] 627tablesWithContext
3. [MEDIUM] text2sqlpaper3
4. [MEDIUM] text2sqlpaper2
5. [MEDIUM] text2sqlpaper
Search results for user: felixh


### Note
User context filtering isn't an authentication or authorization control for your content. It doesn't do user authentication on the user and groups sent to the Query API. It is up to your application to ensure that the user and group information sent to Query API is authenticated and authorized.

# Amazon Q Business example

To authenticate with Q Business you need to update the webapp.env file.

Further details and instructions can be found here:

https://github.com/aws-samples/configuring-qbusiness-with-idc-tti/tree/main

In [None]:
# IMPORTANT TO RUN THE BELOW CODE you need to get the identity context
# launch local webapp: python main.py
# then login and then coppy the context to the idc_sts_context variable in webapp.env
# then stop cell/ local web server

!python main.py

In [9]:
# copy sts:identity_context to get temporary credentials
import boto3
import os
from dotenv import load_dotenv, find_dotenv, set_key
import dotenv
import json
from qbapi_tools.api_helpers import QBusinessAPIHelpers
from qbapi_tools.access_helpers import (
    get_oidc_config,
    get_oidc_id_token,
    get_idc_sts_id_context,
    get_sts_credential,
)


# loading environment variables that are stored in local file
local_env_filename = 'webapp.env'
load_dotenv(find_dotenv(local_env_filename),override=True)

os.environ['REGION'] = os.getenv('region_name')
os.environ['qb_apl_id'] = os.getenv('qb_apl_id')
os.environ['qb_sts_role'] = os.getenv('qb_sts_role')
qb_sts_role = os.environ['qb_sts_role']
REGION = os.environ['REGION']
qb_apl_id = os.environ['qb_apl_id']

os.environ['idc_sts_context'] = os.getenv('idc_sts_context')

idc_sts_context = os.environ['idc_sts_context']

credential = get_sts_credential(qb_sts_role, idc_sts_context,REGION)

def get_qclient(credentials):
    session = boto3.Session(
        aws_access_key_id=credentials["AccessKeyId"],
        aws_secret_access_key=credentials["SecretAccessKey"],
        aws_session_token=credentials["SessionToken"],
    )
    return session.client("qbusiness", REGION)

def chat_with_amazon_q(prompt, conversation_id="", parent_message_id="", credentials=None):
    try:
        
        # Step 4: Create Q client with temporary credentials
        amazon_q = get_qclient(credentials)
        
        # Chat with Amazon Q
        chat_params = {
            "applicationId": qb_apl_id,
            "userMessage": prompt,
        }
        if conversation_id:
            chat_params["conversationId"] = conversation_id
            chat_params["parentMessageId"] = parent_message_id
        
        response = amazon_q.chat_sync(**chat_params)
        return response
    except Exception as e:
        print(f"Error in chat_with_amazon_q: {str(e)}")
        return {"error": str(e)}

def parse_amazon_q_response(response_json):
    """
    Parse the systemMessage and sourceAttributions from an Amazon Q response.
    
    Args:
    response_json (str or dict): The JSON response from Amazon Q, either as a string or a dictionary.
    
    Returns:
    dict: A dictionary containing the parsed systemMessage and sourceAttributions.
    """
    # If the input is a string, parse it into a dictionary
    if isinstance(response_json, str):
        response_dict = json.loads(response_json)
    else:
        response_dict = response_json
    
    # Extract systemMessage
    system_message = response_dict.get('systemMessage', '')
    
    # Extract sourceAttributions
    source_attributions = response_dict.get('sourceAttributions', [])
    
    # Create a more concise version of sourceAttributions
    parsed_attributions = []
    for attribution in source_attributions:
        parsed_attribution = {
            'title': attribution.get('title', ''),
            'snippet': attribution.get('snippet', ''),
            'url': attribution.get('url', ''),
            'citationNumber': attribution.get('citationNumber', 0)
        }
        parsed_attributions.append(parsed_attribution)
    
    # Construct the result
    result = {
        'systemMessage': system_message,
        'sourceAttributions': parsed_attributions
    }
    
    return result

In [10]:
prompt = "What is the name of the product with the highest rating?"
response = chat_with_amazon_q(prompt,credentials=credential)
parsed_result = parse_amazon_q_response(response)
print(json.dumps(parsed_result, indent=2))

{
  "systemMessage": "The product with the highest rating based on the information provided in the data source is the Laptop, which has a rating of 5 .",
  "sourceAttributions": [
    {
      "title": "Laptop",
      "snippet": " Excellent performance for the price.",
      "url": "",
      "citationNumber": 1
    }
  ]
}


## Appendix

In [26]:
# generate JWT token for user context filter with Amazon Kendra
import base64
import uuid
from datetime import datetime, timedelta
import jwt

# HS256 token generation
def generate_token(username, groups, secret, sub, kid):
    base64secret = base64.urlsafe_b64encode(secret.encode('utf-8')).decode('utf-8')
    print("base64secret:", base64secret)

    now = datetime.utcnow()

    # Set claims
    claims = {
        "username": username,
        "groups": groups,
        "iat": now,
        "exp": now + timedelta(minutes=5),
        "iss": sub,
        "sub": sub,
        "jti": str(uuid.uuid4())
    }

    headers = {
        "typ": "JWT",
        "kid": kid
    }

    jwt_token = jwt.encode(claims, secret, algorithm="HS256", headers=headers)
    return jwt_token


username = 'felixh'
groups = ['GROUP1']
secret = 'demo123'
sub = ''
kid = 'key2018'

jwt_token = generate_token(username, groups, secret, sub, kid)


base64secret: ZGVtbzEyMw==
