***Using GenAI Foundational Platform Endpoints for RAG***

Following is sample that shows how to build a RAG workflow using GenAI Foundational Architecture Endpoints

Before you begin, make sure you create a .env file in the same folder as the notebook, and have the following variables. You can get the values for these variables from your admin of the platform.

 
 COGNITO_CLIENT_ID='<replace_me>'

 COGNITO_CLIENT_SECRET='<replace_me>'

 COGNITO_USER_POOL_ID='<replace_me>'

 COGNITO_REGION='<replace_me>'

 COGNITO_DOMAIN='<replace_me>'
 
 PLATFORM_API_URL='<replace_me>'




***Note .env file is only needed when running a notebook. In a real application deployed to EC2 or container, you can just create environment variables. (For example using export command)***

Install the requirements

In [None]:
pip install -r reqs.txt

Load the environment variables

In [None]:
import os
import dotenv
import pprint
import time
# Load the environment variables. This is only necessary if you are using a .env file to store your credentials.
dotenv.load_dotenv()

Inititalize values from env variables

In [None]:
import os
APP_CLIENT_ID = os.getenv('COGNITO_CLIENT_ID')
APP_USER_POOL_ID = os.getenv('COGNITO_USER_POOL_ID')
APP_CLIENT_SECRET = os.getenv('COGNITO_CLIENT_SECRET')
REGION = os.getenv('COGNITO_REGION')
DOMAIN = os.getenv('COGNITO_DOMAIN')
BASE_URL = os.getenv('PLATFORM_API_URL')

We create reusable get and post methods to make API calls to the platform

In [None]:
import requests
import json
import boto3

def get(proxy=None, token=None):
    url = BASE_URL
    # if url ends with /, remove it
    if url.endswith('/'):
        url = url[:-1]
    if proxy:
        url = url + '/' + proxy

    if token:
        headers = {
            'Authorization': f'Bearer {token}'
        }
    response = requests.get(url, headers=headers, timeout=60)
    response.raise_for_status()
    return response

def post(data, proxy=None, token=None):
    url = BASE_URL
    if url.endswith('/'):
        url = url[:-1]
    if proxy:
        url = url + '/' + proxy
    
    if token:
        headers = {
            'Authorization': f'Bearer {token}'
        }
    response = requests.post(url, headers=headers, data=json.dumps(data), timeout=60)
    response.raise_for_status()
    return response

Authenticate with cognito and get the access token. We use this token in the header to make calls to the platform.

In [None]:
from utils import CognitoTokenManager, get_cognito_public_keys
import pprint
cognito_token_manager = CognitoTokenManager(APP_CLIENT_ID, APP_CLIENT_SECRET, APP_USER_POOL_ID, REGION, DOMAIN)
token = cognito_token_manager._fetch_token_with_secret()

#### Listing Models

In [None]:
# GET /model/list_models
list_model_endpoint = 'model/list_models'
response = get(proxy=list_model_endpoint, token=token)
pprint.pprint(response.json())

#### Invoke Model

Simple Text Prompt

In [None]:
# POST /model/invoke
invoke_model_endpoint = 'model/invoke'
data = { 
    "model_name": "ANTHROPIC_CLAUDE_V2", 
    "prompt": "Translate the following text to French: 'Hello, how are you?'", 
    "max_tokens": 100, 
    "temperature": 0.7, 
    "top_p": 0.9, 
    "top_k": 50, 
    "stop_sequences": ["\\n"] 
}
response = post(proxy=invoke_model_endpoint, token=token, data=data)
pprint.pprint(response.json())

Messages API

In [None]:
# POST /model/invoke
data = { 
    "model_name": "ANTHROPIC_CLAUDE_V2", 
    "prompt": [ 
        { 
            "role": "user", 
            "content": [{"text": "What is the weather like today?"}] 
        }, 
        { 
            "role": "assistant", 
            "content": [{"text": "The weather is sunny with a high of 25°C."}] 
        } 
    ], 
    "max_tokens": 100, 
    "temperature": 0.7, 
    "top_p": 0.9, 
    "top_k": 50, 
    "stop_sequences": ["\\n"], 
    "system_prompts": [ 
        { 
            "text": "You are a helful assistant." 
        } 
    ] 
}
response = post(proxy=invoke_model_endpoint, token=token, data=data)
pprint.pprint(response.json())

#### Embed 

In [None]:
# POST /model/embed
embed_text_endpoint = 'model/embed'
data = { 
    "model_name": "TITAN_TEXT_EMBED_V2", 
    "input_text": "Hello, how are you?" 
}
response = post(proxy=embed_text_endpoint, token=token, data=data)
pprint.pprint(response.json())

#### Document Extraction

Create Extraction Job

In [None]:
create_extraction_job_endpoint = 'document/extraction/create_job'
extraction_job = get(proxy=create_extraction_job_endpoint, token=token)
pprint.pprint(extraction_job.json())

Register Files to the Job

In [None]:
register_file_endpoint = 'document/extraction/register_file'
file_name = '<REPLACE_WITH_LOCAL_FILE_PATH>' # e.g. 'data/sample.pdf'
data = { 
    "extraction_job_id": extraction_job.json()['extraction_job_id'], 
    "file_name": file_name
}
response = post(proxy=register_file_endpoint, token=token, data=data)
pprint.pprint(response.json())

Upload the files using presigned urls

In [None]:
## Upload the file to the S3 bucket
pre_signed_url = response.json()['upload_url']
import requests
with open(file_name, 'rb') as f:
    response = requests.put(pre_signed_url, data=f)
    print(response.status_code)

Start Extraction Job

In [None]:
# start job
start_job_endpoint = 'document/extraction/start_job'
data = {
    "extraction_job_id": extraction_job.json()['extraction_job_id']
}
response = post(proxy=start_job_endpoint, token=token, data=data)
pprint.pprint(response.json())

Check Extraction Job Status

In [None]:
# /document/extraction/job_status/{extraction_job_id}
job_status_endpoint = f'document/extraction/job_status/{extraction_job.json()["extraction_job_id"]}'
response = get(proxy=job_status_endpoint, token=token)
status = response.json()['status']
while status != 'COMPLETED' and status != 'FAILED' and status != 'COMPLETED_WITH_ERRORS':
    response = get(proxy=job_status_endpoint, token=token)
    status = response.json()['status']
    print(status)
pprint.pprint(response.json())

Get Extracted Text

In [None]:
# POST /document/extraction/file_status
file_status_endpoint = 'document/extraction/file_status'
data = {
    "extraction_job_id": extraction_job.json()['extraction_job_id'],
    "file_name": file_name
}
response = post(proxy=file_status_endpoint, token=token, data=data)
pprint.pprint(response.json())
result_url = response.json()['result_url']

# Get the result
response = requests.get(result_url)
print(response.status_code)
pprint.pprint(response.json())

#### Chunking

Create a chunking job

In [None]:
# POST /document/chunking/create_job
create_chunking_job_endpoint = 'document/chunking/create_job'
chunking_strategy = 'fixed_size'
chunk_size = 400
chunk_overlap = 100
data = {
    "extraction_job_id": extraction_job.json()['extraction_job_id'],
    "chunking_strategy": chunking_strategy,
    "chunking_params": {
        "chunk_size": chunk_size,
        "chunk_overlap": chunk_overlap
    }
}
chunk_job = post(proxy=create_chunking_job_endpoint, token=token, data=data)
pprint.pprint(chunk_job.json())

Check Chunking Job Status

In [None]:
# GET /document/chunking/job_status/{job_id}
job_status_endpoint = f'document/chunking/job_status/{chunk_job.json()["chunking_job_id"]}'
chunk_job_status = get(proxy=job_status_endpoint, token=token)
status = chunk_job_status.json()['status']
while status != 'COMPLETED' and status != 'FAILED' and status != 'COMPLETED_WITH_ERRORS':
    chunk_job_status = get(proxy=job_status_endpoint, token=token)
    status = chunk_job_status.json()['status']
    print(status)
pprint.pprint(chunk_job_status.json())

Get Chunks

In [None]:
# POST /document/chunking/chunk_file_url
chunk_file_url_endpoint = 'document/chunking/chunk_file_url'
data = {
    "chunking_job_id": chunk_job.json()['chunking_job_id'],
    "file_name": file_name
}
chunk_file = post(proxy=chunk_file_url_endpoint, token=token, data=data)
pprint.pprint(chunk_file.text)
chunk_file_url = chunk_file.json()['chunk_file_url']

# Get the chunked file
chunk_file_text = requests.get(chunk_file_url)
print(chunk_file_text.status_code)
pprint.pprint(chunk_file_text.json())

#### Vectorization

Create Vector Store

In [None]:
# POST /vector/store/create
create_vector_store_endpoint = 'vector/store/create'
data = {
  "store_name": "SolarSystem",
  "store_type": "opensearchserverless",
  "description": "Collection for storing vectorized documents",
  "tags": [
    {
      "key": "project",
      "value": "GenerativeAI"
    }
  ]
}
vector_store = post(proxy=create_vector_store_endpoint, token=token, data=data)
pprint.pprint(vector_store.json())


Check Vector Status

In [None]:
# POST /vector/store/status
vector_store_status_endpoint = 'vector/store/status'
data = {
    "store_id": vector_store.json()['store_id']
}
vector_store_status = post(proxy=vector_store_status_endpoint, token=token, data=data)
pprint.pprint(vector_store_status.json())
while vector_store_status.json()['status'] != 'ACTIVE':
    vector_store_status = post(proxy=vector_store_status_endpoint, token=token, data=data)
    pprint.pprint(vector_store_status.json())
    time.sleep(5)

Create Index

In [None]:
# /vector/store/index/create POST
create_index_endpoint = 'vector/store/index/create'
data = {
  "store_id": vector_store.json()['store_id'],
  "index_name": "my_index2"
}
vector_index = post(proxy=create_index_endpoint, token=token, data=data)
pprint.pprint(vector_index.json())


Check Index Status

In [None]:
# POST /vector/store/index/create
create_index_endpoint = 'vector/store/index/create'
data = {
  "store_id": vector_store.json()['store_id'],
  "index_name": "my_index3"
}
vector_index = post(proxy=create_index_endpoint, token=token, data=data)
pprint.pprint(vector_index.text)

Vectorize Chunks

In [None]:
# POST /vector/store/vectorize
vectorize_endpoint = 'vector/store/vectorize'
data = {
  "chunking_job_id": chunk_job.json()['chunking_job_id'],
  "index_id": vector_index.json()['index_id']
}
vectorize = post(proxy=vectorize_endpoint, token=token, data=data)
pprint.pprint(vectorize.json())

Semantic Search

In [None]:
# /vector/job/status/{vectorize_job_id}
job_status_endpoint = f'vector/job/status/{vectorize.json()["vectorize_job_id"]}'
vectorize_job_status = get(proxy=job_status_endpoint, token=token)
status = vectorize_job_status.json()['status']
while status != 'COMPLETED' and status != 'FAILED' and status != 'COMPLETED_WITH_ERRORS':
    vectorize_job_status = get(proxy=job_status_endpoint, token=token)
    status = vectorize_job_status.json()['status']
    print(status)
pprint.pprint(vectorize_job_status.json())

In [None]:
# POST /vector/search
search_endpoint = 'vector/search'
data = {
  "query": "<REPLACE_WITH_QUERY>", # Question related to the document
  "index_id": vector_index.json()['index_id']
}
search = post(proxy=search_endpoint, token=token, data=data)
pprint.pprint(search.json())

#### Retrieval Augmented Generation

In [None]:
question = "<REPLACE_WITH_QUERY>" # Question related to the document

# Vector search
search_endpoint = 'vector/search'
data = {
  "query": question,
  "index_id": vector_index.json()['index_id']
}
search = post(proxy=search_endpoint, token=token, data=data)

prompt = """
           You are a helpful assistant. Given a context, answer the following question.
           Context: {context}
           Question: {question}
           Answer:
           """
context_text = ""
for hit in search.json():
    context_text += hit['text'] + ' '
    context_text += " "
final_prompt = prompt.format(context=context_text, question=question)
print(final_prompt)

# Invoke the model
data = { 
    "model_name": "ANTHROPIC_CLAUDE_V2", 
    "prompt": final_prompt, 
    "max_tokens": 100, 
    "temperature": 0.7, 
    "top_p": 0.9, 
    "top_k": 50, 
    "stop_sequences": ["\\n"] 
}
response = post(proxy=invoke_model_endpoint, token=token, data=data)
pprint.pprint(response.json())