#### Step 1 : Import Required Libs

In [2]:
## Import required Libs
import boto3
import json
from botocore.exceptions import BotoCoreError, ClientError


#### Step 2 : Set AWS Region

In [3]:
## Set AWS Region
try:
    # Set AWS Region
    region = "us-east-1"  # Note the correct region code "us-east-1"
    
    # The Bedrock runtime must be initialized with your AWS credentials/profile and region to authenticate you 
    # and direct requests to the right foundation models or agent.
    bedrock_runtime = boto3.client("bedrock-runtime", region_name=region)
    s3 = boto3.client("s3", region_name=region)
    
    print("Project Environment is setup successfully")
    
except (BotoCoreError, ClientError) as error:
    print(f"An error occurred while setting up AWS clients: {error}")


Project Environment is setup successfully


#### Step-3 : Setup RAG Function

###### 1. Take user query
###### 2. Retrieve relevant information from our knowledge base
###### 3. Generate a response using a foundation model

In [4]:
##--------------------------------------------------------------------------------------------------------------------------------------------- 
## 1] RAG Model - Amazon Titan text foundation model available via Amazon Bedrock.
##  This model is a large language model (LLM) optimized for a wide range of advanced, general-purpose language
##  tasks such as open-ended text generation, conversational chat, and also supports Retrieval Augmented Generation (RAG) workflows.
##  ---
## 2] knowledge_base_id identifies the knowledge base to query.This is a unique identifier (typically a string or an Amazon Resource Name, ARN)
##  that specifies which knowledge base your application or model should access during information retrieval.
##  In the context of Amazon Bedrock: A knowledge base is a structured repository of information—often created by ingesting private documents,
##  databases, or other content. Amazon Bedrock uses these knowledge bases to support Retrieval-Augmented Generation (RAG) workflows.
##  The knowledge_base_id is the unique ID assigned to a specific knowledge base within Amazon Bedrock. When issuing a query,
##  this ID tells Bedrock which knowledge base to search for relevant information.
##---------------------------------------------------------------------------------------------------------------------------------------------
##
model_id = "amazon.titan-text-express-v1"
def generate_rag_response(query,knowledge_base_id, model_id = model_id):
    """ Generate Response using RAG Approach
        Args :
         1] query (str) : The user's question
         2] knowledge_base_id (str) : ID of the knowledge base to query
    """
    try: 
        """ Step-1 : Retrieve information from the knowledge base
        # Note : In a full implemenation, we would call the Bedrock Knowledge Base API here
        # For this example, we will simulate the retrieval with a placeholder
        # bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name = region)
        # kb_response = bedrock_agent_runtime.retrieve(
           knowledgeBaseId=knowledge_base_id,
           retrivalQuery ={'text' : query},
           numberOfResults = 3)
       """
       ## For simplified example:
        retrieved_info =""" Employee Handbook Section 3.2 : Pait Time Off
         All Employees accrue PTO at a rate of 1.5 days per month (18 days per year).
         PTO requests must be submitted at least two weeks in advance through the HR portal
         Unused PTO can be carried over to the following year, up to a maximum of 5 days
         For further queries on PTO, please contact hr@example.com

         Remote work policy for full time employees only - Full time employees are eligible to work remotely 
         up to 2 days per week and max of 10 days per month under unavoidable circumstances only when manager 
         has approved additional remote work days. Under normal circumstances, full time employees are expected to work from office for 3 days a week.
         For consitency, co-ordination and efficiency purpose, all employees are expected to work from
         office from Monday to Wednesday.
                                          """
        
        # Step 2 - Construct the prompt for the model using retrieved information and query
        prompt = f"""
          You are an HR assistant for our company.Use only following information to answer the query.
          If you do not know the answer based on this information then say No - don't make up an answer

          RETRIEVED INFORMATION :
          {retrieved_info}

          USER QUESTION:
          {query}
            """
    
        # Step 3 - Construct the payload
        # Generate a response using the specified model
        if 'claude' in model_id.lower():
            payload = {
               "anthropic_version": "bedrock-2023-05-31", # Specify the version of the Anthropic model to use
               "max_tokens": 512, # How many words (max tokens) the AI can respond with (up to 512).
               "temperature" : 0.5, #How “creative” or random the answers should be (temperature 0.5 means moderately creative).
               "messages" :[
                    {
                         "role": "user",# The role of the user in the conversation
                         "content": prompt # The actual text of the user message, which includes the retrieved information and the user's question
                     
                  }
                  
               ]
            }
        else:
            # Generic format for other models.
            payload = {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 256,
                    "temperature": 0.9
                }
            }
        # Step 4 - Call the model   
        response = bedrock_runtime.invoke_model(
            modelId=model_id,
            body=json.dumps(payload), # Convert the payload to JSON format
            contentType="application/json", # Specify the content type as JSON
            accept="application/json" # Specify the response format as JSON
        )
        # Step 5 - Parse the response based on the model type.
        response_body = json.loads(response['body'].read().decode('utf-8')) # Read and decode the response body
        ###       
        if 'claude' in model_id.lower():
            generated_text = response_body["content"][0]["text"]
        else:
            generated_text = response_body["results"][0]["outputText"]
        
        return generated_text
    
    except Exception as e:
        return f"An error occurred while generating the response:{str (e)}"


#### Step 4:  Test RAG Application

In [5]:
# Define a sample knowledge base ID - This would be a real ID in a production scenario
sample_kb_id = "sample hr knowledge base id"  # Replace with your actual knowledge base ID
# Test query
# test_query = "How many PTO days do full-time employees get per year?"
#test_query = "Is current remote work policy is applicable for full time employees only? What about part time employees?"
test_query = " What is the role of my manager ?"
# test_query = "Why all employees are expected to work from office from Monday to Wednesday?"  # Uncomment to use this query
# # Call the function to generate a response
response = generate_rag_response(test_query, sample_kb_id)
print("\n User Query:", test_query)
print("\n Response from RAG Model:", response)
# --- End  ---




 User Query:  What is the role of my manager ?

 Response from RAG Model:  The manager is expected to approve additional remote work days if unavoidable circumstances permit


In [6]:
# Test with a question that should be answerable from our knowledge base
test_query_2 = "What's the process for requesting time off?"
response_2 = generate_rag_response(test_query_2, sample_kb_id)

# Test with a question that might not be in our knowledge base
test_query_3 = "What's the company policy on remote work?"
response_3 = generate_rag_response(test_query_3, sample_kb_id)

print("\nQuestion about time off process:")
print(response_2)

print("\nQuestion about remote work policy (not in our example knowledge base):")
print(response_3)



Question about time off process:

The process for requesting time off is by submitting a request through the HR portal at least two weeks in advance.

Question about remote work policy (not in our example knowledge base):
 Full-time employees are permitted to work remotely up to two days per week and a maximum of ten days per month under unavoidable circumstances only when approved by the manager.


In [7]:
# Set up the Knowledge Base Using Vector Database
# Step 1 - Create S3 bucket. Create markdown file containing HR Policies and store in AWS s3

import boto3

# Initialize boto3 S3 client
s3 = boto3.client('s3')

# Generate a unique bucket name using your account ID
account_id = boto3.client('sts').get_caller_identity()['Account']
bucket_name = f"hr-knowledge-base-{account_id}"

# Check if the bucket already exists
def bucket_exists(bucket_name):
    response = s3.list_buckets()
    buckets = [bucket['Name'] for bucket in response['Buckets']]
    return bucket_name in buckets

# Create the bucket if it doesn't exist
if not bucket_exists(bucket_name):
    try:
        s3.create_bucket(Bucket=bucket_name)
        print(f"Created bucket: {bucket_name}")
    except Exception as e:
        print(f"Error creating bucket: {str(e)}")
else:
    print(f"Bucket {bucket_name} already exists")

Bucket hr-knowledge-base-257269733378 already exists


In [8]:
# Create a sample HR policy markdown file
hr_policy = """ Below is a comprehensive 3-page sample HR policy addressing annual leave, maternity and paternity benefits, sick leave, casual leave, festival leaves, leave carry-forward/cash payout, and a professional certification voucher. This is structured as a policy section of an employee handbook in accordance with recent best practices, clearly outlining purpose, scope, entitlements, approval processes, and conditions.[1][2][3]

***

## 1. Leave and Time-Off Policy

### Purpose and Scope

This policy guides all regular full-time and part-time employees regarding eligibility, accrual, usage, and management of various types of leave. It aims to enable a healthy work-life balance, demonstrate organizational care, and meet compliance standards. All sections apply company-wide except where statutory or contractual provisions specify otherwise.

***

### 1.1. Annual Leave (Vacation Leave)

- **Entitlement:** Each employee is entitled to 24 days of paid annual leave per calendar year, accrued monthly.
- **Leave Carry Forward:** Up to 10 days of unused annual leave may be carried forward to the next calendar year. Surplus leave in excess of 10 days will lapse, except as described below.
- **Cash Payout:** At year-end, 50% of surplus leave days (beyond the 10 carry-forward cap) will be paid out as a one-time cash benefit, calculated at the employee’s basic daily wage rate, subject to statutory deductions.
- **Approval:** Employees must submit annual leave requests at least 2 weeks in advance. Approvals are subject to departmental work requirements.

***

### 1.2. Sick Leave

- **Entitlement:** Employees are eligible for up to 12 days of paid sick leave per year, non-cumulative.
- **Documentation:** Medical certificate is required for absences exceeding 2 consecutive days. Unused sick leave lapses at year-end and cannot be carried forward or cashed out.
- **Notification:** Employees must notify their manager and HR as early as possible on the first day of illness.

***

### 1.3. Casual Leave

- **Entitlement:** 8 days of paid casual leave annually, for personal, urgent, or unforeseen matters.
- **Usage:** No more than 2 consecutive days without managerial approval; unused casual leave lapses at year-end.
- **Application:** At least one day’s prior notice required unless in emergencies.

***

### 1.4. Festival and Public Holidays

- **Entitlement:** All declared national and company-specified festival/public holidays are paid days off.
- **Optional Holidays:** Employees may select up to 2 optional festival days annually from a company-approved list for personal observance, in addition to declared holidays.

***

### 1.5. Maternity and Paternity Benefits

- **Maternity Leave:** 26 weeks of paid leave for eligible employees, as per statutory guidelines. Up to 8 weeks may be availed prior to expected delivery date.
- **Paternity Leave:** 15 days of paid paternity leave within 3 months of childbirth.
- **Adoption:** Employees adopting a child under 12 months: 12 weeks (primary caregiver) or 7 days (secondary caregiver) paid leave.
- **Application:** Written application must be submitted to HR at least 4 weeks in advance, unless emergency circumstances apply. Medical/adoption documentation is required.

***

### 1.6. Leave Without Pay (LWP)

- **Eligibility:** Applied upon exhaustion of all paid leave entitlements, subject to managerial approval.
- **Termination or Resignation:** Accrued but unused annual leave will be compensated as per the leave carry forward/cash out policy upon separation from service.

***

## 2. Certification Voucher Policy

### 2.1. Professional Development Voucher

- **Purpose:** To support employee upskilling and industry certification, each permanent employee is eligible for a voucher of up to **USD $500** per calendar year to reimburse the successful completion of a recognized industry certification.
- **Eligibility and Approval:** The certification must be relevant to the employee’s job role and/or agreed career development plan. Prior written approval of the immediate manager and HR is required before enrollment.
- **Reimbursement:** Reimbursement is provided upon submission of completion certificate and original receipts. Employees must agree to serve the company for at least 12 months after completion, or the voucher may be clawed back on a prorated basis.
- **Limit:** One voucher per person per calendar year, subject to budget availability.

***

## 3. General Provisions & Enforcement

- **Compliance:** All employees are obliged to adhere to the procedures for applying and reporting for leave and certification reimbursement.
- **Non-Compliance:** Falsification of leave claims, persistent absenteeism, or non-approved absences may lead to disciplinary action as per the Company Code of Conduct.
- **Policy Review:** This policy is subject to annual review and may be modified per statutory changes or management decision. Employees will be notified of any changes.

***

**Definitions:**  
- *Paid leave:* Leave during which employee receives full salary/wages.  
- *Carry forward:* Transferring accrued but unused leave to the next year.  
- *Cash payout:* Direct monetary compensation for unused leave.

**References:**  
- All leave entitlements and provisions in this policy comply with applicable local labor laws and supersede prior policies where inconsistent.[2][3][4]

***

*This policy is accessible on the company intranet and will be provided to all new employees during induction. HR remains the custodian for policy implementation and clarification.*
"""

In [9]:
## Save the HR policy content to a markdown file in S3
with open('hr_policy.md', 'w') as file:
    file.write(hr_policy)
# Upload the markdown file to the S3 bucket
try:    
    s3.upload_file('hr_policy.md', bucket_name, 'hr_policy.md')
    print(f"HR policy file uploaded to S3 bucket {bucket_name} successfully.")  
except Exception as e:
    print(f"Error uploading HR policy file to S3: {str(e)}")    
    

HR policy file uploaded to S3 bucket hr-knowledge-base-257269733378 successfully.


In [10]:
# # Create a python function to query the knowledge base and generate a response using the RAG model
# import json
# import boto3
# #
# model_id = "amazon.titan-text-express-v1"
# def query_hr_knowledge_base(user_query, knowledge_base_id, model_id = model_id):
#     """ Query HR Knowledge Base and generate a response using RAG Model
#         Args :
#          1] user_query (str) : The user's question
#          2] knowledge_base_id (str) : ID of the knowledge base to query
#     """
#     #----
#     # Remember, bedrock_runtime is for direct model inference, while bedrock_agent_runtime is for agent interactions,
#     # multi-turn flows, and advanced orchestration within AWS Bedrock. Both clients are required when your
#     # application needs to both interact directly with models  and leverage Bedrock’s agent capabilities,
#     # as each exposes different methods and API endpoints.
#     #----
#     # Multi-turn flows enable an AI agent or system to have multiple rounds of conversation with a user,
#     # where each exchange depends on the previous context. Rather than responding to isolated, single-turn queries, 
#     # the agent maintains conversational memory—letting it ask follow-up questions,  clarify details, 
#     # and adapt its actions based on user responses
#     #----
#     # Advanced orchestration refers to the coordination and management of multiple AI components—models, tools, agents, 
#     # external APIs, data pipelines—so they work together seamlessly as a unified system
#     #----
#     # Initialize the Bedrock runtime client.
#     #----
#     bedrock_runtime = boto3.client("bedrock-runtime", region_name=region)
#     bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name=region) # Agent runtime 
#     try:
#         retrieve_response = bedrock_agent_runtime.retrieve(
#             knowledgeBaseId=knowledge_base_id,
#             retrievalQuery={'text': user_query
#                             },
#             retrievalConfiguration={
#                 'vectorSearchConfiguration': {
#                     numberOfResults: 5 # Retrieve top most 5 relevant results
#                     }
#                 }
            
#         )

#         # Extract the retrieved information from the response
#         retrived_passages = []
#         for result in retrieve_response.get('retrievedResults',[]):
#             content = result.get('content', {}).get('text', '')
#             source = "Unknown source"
#             location = result.get('location', {})
#             if location:
#                 if 's3location' in location:
#                     source = location.get('s3location', {}).get('uri', source)
#                 elif 'type' in location:
#                     source = location.get('type', source)

#             score  = result.get('score', 0)
#             # Append the retrieved passage to the list
#             retrived_passages.append({
#                 'content': content,
#                 'source': source,
#                 relevance_score: score
#             })
#         # Step 2 - Prepare context from retrieved passages for the model
#         context = "\n\n".join([f"Passage: {p['content']}" for p in retrived_passages])
#         # If no context was retrieved, use a default message to inform the user
#         if not context:
#             context = "No relevant information found in the knowledge base."
#         # Step 3 - Construct the prompt for the model using retrieved information and query
#         prompt = f"""
#           You are an HR assistant for our company.Use only following information to answer the query.
#           If you do not know the answer based on this information then say 
#           'No I do not have definitive answer for this as of now' - don't make up an answer

#           Context:
#           {context}

#           Questions: {user_query}

#           Answer:            
#         """
#         # Step 4 - Construct the payload - Remember, the payload structure may vary based on the model type
#         if "anthropic" in model_id.lower():
#             print("Inside Anthropic Model")
#             payload = {
#                 "anthropic_version": "bedrock-2023-05-31",  # Specify the version of the Anthropic model to use
#                 "max_tokens": 512,  # How many words (max tokens) the AI can respond with (up to 512).
#                 "temperature": 0.5,  # How “creative” or random the answers should be (temperature 0.5 means moderately creative).
#                 "messages": [
#                     {
#                         "role": "user",  # The role of the user in the conversation
#                         "content": prompt  # The actual text of the user message, which includes the retrieved information and the user's question
#                     }
#                 ]
#             }
#         elif "amazon.titan" in model_id.lower():
#             print("Inside Amazon Titan Model")
#             payload = {
#                 "inputText": prompt,
#                 "textGenerationConfig": {
#                     "maxTokenCount": 512,  # Maximum number of tokens in the response
#                     "temperature": 0.6, # Controls the randomness of the output
#                     "topP": 0.9  # Controls the diversity of the output by limiting the probability mass considered
#                 }
#             }
#         elif "meta.llama" in model_id.lower():
#             print("Inside Meta Llama Model")
#             payload = {
#                 "inputText": prompt,
#                 "textGenerationConfig": {
#                     "maxTokenCount": 512,  # Maximum number of tokens in the response
#                     "temperature": 0.7,  # Controls the randomness of the output
#                     "topP": 0.9  # Controls the diversity of the output by limiting the probability mass considered
#                 }
#             }
#         else:
#             raise ValueError(f"Unsupported model ID: {model_id}. Please use a supported model ID for RAG workflows.")
        
#         # Step 5 - Call the model
#         invoke_response = bedrock_runtime.invoke_model(
#             modelId=model_id,
#             body=json.dumps(payload),  # Convert the payload to JSON format
#             contentType="application/json",  # Specify the content type as JSON
#             accept="application/json"  # Specify the response format as JSON
#         )

#         # Step 6 - Parse the response based on the model type.
#         response_body = json.loads(invoke_response['body'].read().decode('utf-8'))  # Read and decode the response body
#         #
#         if "anthropic" in model_id.lower():
#             print("Model used here is :", model_id)
#             generated_answer = response_body.get('content',[{}])[0].get("text")
#         elif "amazon.titan" in model_id.lower():
#             print("Model used here is :", model_id)
#             generated_answer = response_body.get('results', [{}])[0].get("outputText",'')
#         elif "meta.llama" in model_id.lower():
#             print("Model used here is :", model_id)
#             generated_answer = response_body.get('generation', '')
#         else:
#             generated_answer = "Error : Could not parse response from the model. Please check the model ID and payload format."

#         # Return the generated answer
#         return {
#             "query" : user_query,
#             "retrieved_passages": retrived_passages,
#             "generated_answer": generated_answer,   
#             "model_used": model_id
#         }
#     except Exception as e:
#         {
#             "Error :"   : str(e),
#             "query :" : user_query,
#             "knowledge_base_id" : knowledge_base_id,
#             "retrieved_passages": [],
#             "model_id": model_id
#         }


In [11]:
import json
import boto3

model_id = "amazon.titan-text-express-v1"  # Default
region = "us-east-1"  # Change this to your AWS region

def query_hr_knowledge_base(user_query, knowledge_base_id, model_id=model_id, region=region):
    """
    Query HR Knowledge Base and generate a response using a RAG Model.
    Args:
        user_query (str): The user's question.
        knowledge_base_id (str): ID of the knowledge base to query.
        model_id (str): The model ID to use.
        region (str): AWS region where Bedrock is deployed.
    """
    bedrock_runtime = boto3.client("bedrock-runtime", region_name=region)
    bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name=region)

    try:
        retrieve_response = bedrock_agent_runtime.retrieve(
            knowledgeBaseId=knowledge_base_id,
            retrievalQuery={'text': user_query},
            retrievalConfiguration={
                'vectorSearchConfiguration': {
                    'numberOfResults': 5  # Top 5 relevant results
                }
            }
        )

        # Extract retrieved information
        retrieved_passages = []
        for result in retrieve_response.get('retrievedResults', []):
            content = result.get('content', {}).get('text', '')
            source = "Unknown source"
            location = result.get('location', {})

            if location:
                if 's3location' in location:
                    source = location.get('s3location', {}).get('uri', source)
                elif 'type' in location:
                    source = location.get('type', source)

            score = result.get('score', 0)
            retrieved_passages.append({
                'content': content,
                'source': source,
                'relevance_score': score
            })

        # Prepare context
        context = "\n\n".join([f"Passage: {p['content']}" for p in retrieved_passages])
        if not context:
            context = "No relevant information found in the knowledge base."

        # Construct prompt
        prompt = f"""
        You are an HR assistant for our company. Use only the following information to answer the query.
        If you do not know the answer based on this information then say:
        'No I do not have definitive answer for this as of now' — don't make up an answer.

        Context:
        {context}

        Question: {user_query}

        Answer:
        """

        # Select payload based on model type
        if "anthropic" in model_id.lower():
            print("Inside Anthropic Model")
            payload = {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 512,
                "temperature": 0.5,
                "messages": [{"role": "user", "content": prompt}]
            }
        elif "amazon.titan" in model_id.lower():
            print("Inside Amazon Titan Model")
            payload = {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 512,
                    "temperature": 0.6,
                    "topP": 0.9
                }
            }
        elif "meta.llama" in model_id.lower():
            print("Inside Meta Llama Model")
            payload = {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 512,
                    "temperature": 0.7,
                    "topP": 0.9
                }
            }
        else:
            raise ValueError(f"Unsupported model ID: {model_id}. Please use a supported model ID for RAG workflows.")

        # Call the model
        invoke_response = bedrock_runtime.invoke_model(
            modelId=model_id,
            body=json.dumps(payload),
            contentType="application/json",
            accept="application/json"
        )

        # Parse the response
        response_body = json.loads(invoke_response['body'].read().decode('utf-8'))

        if "anthropic" in model_id.lower():
            generated_answer = response_body.get('content', [{}])[0].get("text", "")
        elif "amazon.titan" in model_id.lower():
            generated_answer = response_body.get('results', [{}])[0].get("outputText", '')
        elif "meta.llama" in model_id.lower():
            generated_answer = response_body.get('generation', '')
        else:
            generated_answer = "Error: Could not parse response from the model."

        # Final return
        return {
            "query": user_query,
            "retrieved_passages": retrieved_passages,
            "generated_answer": generated_answer,
            "model_used": model_id
        }

    except Exception as e:
        return {
            "error": str(e),
            "query": user_query,
            "knowledge_base_id": knowledge_base_id,
            "retrieved_passages": [],
            "model_id": model_id
        }


In [12]:
##Retrievel Example
# Define a sample knowledge base ID - This would be a real ID in a production scenario
#sample_kb_id = "sample hr knowledge base id"  # Replace with your actual knowledge base ID
sample_kb_id = "ZHJZPXTKEV"
print(f"Model Id is : {model_id}")

# Test with a question that should be answerable from our knowledge base
query = "How many days I can work remotely in a month?"
# query = "Who can avail Remote working benefit?"
result = query_hr_knowledge_base(query,sample_kb_id,model_id)
if result is not None:
    print("\n" + "=="*30)
    print(f"Query: {result['query']}")
if result['retrieved_passages']:
    print(f"\nRetrieved Information:\n{result['retrieved_passages'][0]['content']}")
else:
    print("\nNo retrieved information found.")
print(f"\nGenerated Answer:\n {result['generated_answer']}")




Model Id is : amazon.titan-text-express-v1
Inside Amazon Titan Model

Query: How many days I can work remotely in a month?

No retrieved information found.

Generated Answer:
 No I do not have definitive answer for this as of now
