In [2]:
#install necessary packages
!pip install --upgrade pip
!pip install psycopg2 
!pip install pgvector 

[0m

In [3]:
#import packages
import boto3
import json
import psycopg2

In [4]:
#setup AWS services
secretsManager = boto3.client(
    service_name = 'secretsmanager',
    region_name = 'us-east-1'
)

bedrock_runtime = boto3.client(
    service_name = 'bedrock-runtime', 
    region_name = 'us-east-1'
)

In [5]:
#get Quip token
secretId = 'quip'
secretsManagerResponse = secretsManager.get_secret_value(SecretId = secretId)['SecretString']
secretsManagerSecret = json.loads(secretsManagerResponse)

In [6]:
#set root folder for search
rootFolderId = 'QofAOVRrPwzJ'

In [7]:
#get database info from Secrets Manager
dbSecretId = 'quipsearcher'
dbSecretsManagerResponse = secretsManager.get_secret_value(SecretId = dbSecretId)['SecretString']
dbSecretsManagerSecret = json.loads(dbSecretsManagerResponse)

In [8]:
#connect to database
dbClient = psycopg2.connect(
    host = dbSecretsManagerSecret.get('host'),
    port = dbSecretsManagerSecret.get('port'),
    database = dbSecretsManagerSecret.get('database'),
    user = dbSecretsManagerSecret.get('username'),
    password = dbSecretsManagerSecret.get('password')
)
dbClient.set_session(autocommit = True)
dbCursor = dbClient.cursor()

In [9]:
#read in a phrase to search for and create an embedding for it
searchPhrase = "Is there a secret in the QuipSearcher test doc?"
searchJson = json.dumps({
    "inputText": searchPhrase,
})

searchPhraseExcaped = "\"" + searchPhrase.replace("'","").replace(" ", "\" or \"") + "\"" #tokenize search phrase to help PostgreSQL text search

searchEmbeddingResponse = bedrock_runtime.invoke_model(
    body = searchJson, 
    modelId = 'amazon.titan-embed-text-v1', 
    accept = 'application/json', 
    contentType = 'application/json'
)

searchEmbeddingResponseBody = json.loads(searchEmbeddingResponse['body'].read())
searchEmbedding = searchEmbeddingResponseBody.get('embedding')

In [10]:
#get the top results from the database
dbCursor.execute(f"""SELECT *
    FROM t_{rootFolderId}, websearch_to_tsquery('simple', '{searchPhraseExcaped}') query
    WHERE to_tsvector('simple', text) @@ query
    ORDER BY embeddings <-> '{searchEmbedding}' LIMIT 50;""")
dbResultsRaw = dbCursor.fetchall()

In [11]:
#close the database connection
dbCursor.close()
dbClient.close()

In [12]:
#get the top sections until we reach our length limit
results = []
resultLengthLimit = 12000;

for dbResultRaw in dbResultsRaw:
    if resultLengthLimit > dbResultRaw[2]:
        
        result = {
            "title": dbResultRaw[3],
            "link": dbResultRaw[4],
            "text": dbResultRaw[5]
        }
        
        results.append(result)

        resultLengthLimit -= dbResultRaw[2]

In [13]:
#create a prompt
notesXML = ""

for result in results: 
    
    noteXML = f"""
        <note>
            <title>{result.get('title')}</thread>
            <link>{result.get('link')}</link>
            <text>{result.get('text')}</text>
        </note>
    """
    notesXML += noteXML

instructionsXML = f"""
    <instruction>Use the notes to provide details on the topic.</instruction>
    <instruction>Create a summary section that summarizes the topic based on the notes. Stay focused solely on the topic!</instruction>
    <instruction>Create a links sections that includes the most relevant notes.</instruction>
    <instruction>Double check to make sure there is are not any links that share the same address in the output.</instruction>
    <instruction>Use basic text formatting. Do not use a markup language or markdown formatting.</instruction>
    <instruction>Take your time and be specific where possible. A meaningful answer is more important than a fast one.</instruction>
    <instruction>If you can't find details in the notes tell me. Do not try to find the information elsewhere.</instruction>
"""
    
prompt = f"""Human:
    <instructions>{instructionsXML}</instructions>
    <notes>{notesXML}</notes>
    <topic>{searchPhrase}</topic>

Assistant:"""

In [14]:
#create bedrock input JSON
bedrockInputJSON = json.dumps({
    "prompt": prompt, 
    "max_tokens_to_sample":4000,
    "temperature":0,
    "top_k":250,
    "top_p":1,
    "stop_sequences":[]
})

In [15]:
#get results from bedrock
bedrockClaudeResponse = bedrock_runtime.invoke_model(
    body = bedrockInputJSON, 
    modelId = 'anthropic.claude-v2', 
    accept = 'application/json', 
    contentType = 'application/json'
)

bedrockClaudeResponseBody = json.loads(bedrockClaudeResponse['body'].read())

In [16]:
print(bedrockClaudeResponseBody.get('completion'))

 Here is a summary and relevant links on the topic "Is there a secret in the QuipSearcher test doc?":

Summary:
The QuipSearcher Test Doc note indicates there is a secret in the document. Specifically, the text says "The secret is Bedrock!" This confirms there is a secret contained in the QuipSearcher Test Doc.

Links:
- QuipSearcher Test Doc: https://quip-masked.com/MUSKAXEmzS7G (masked manually for GitHub)

The QuipSearcher Test Doc note is the only one that seems relevant to this topic, as it is the only note that references QuipSearcher or contains any indication of a secret. The other notes appear unrelated to the topic. Please let me know if I should clarify or expand my response in any way!
