#### This notebook has been tested with SageMaker Studio Notebooks, with Data science 3.0 and Python 3 environment

In [101]:
!pip install -U langchain

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [102]:
import langchain
print(langchain.__version__)

0.0.264


In [103]:
from langchain import PromptTemplate

prompt_template = """
\n\nHuman:

Please help me to find the answer, based on the retrieved information and the user input question.

user input question:

{user_query}

retrieved information:

{retrieved_information}

Please ensure the answer are relevant to retrieved information, if you could not find the answer, please wrote #IDONTKNOW exact words without any extrat word. 
\n\nAssistant:"
"""
prompt = PromptTemplate.from_template(prompt_template)

In [104]:
import boto3

from langchain.prompts import StringPromptTemplate
import pandas as pd
from sklearn.preprocessing import normalize
import numpy as np 

import psycopg2
from pgvector.psycopg2 import register_vector
import boto3 
import json 


class RetrievedPromptTemplate(StringPromptTemplate):
    

    def format(self, **kwargs) -> str:
        bedrock_runtime = boto3.client('bedrock-runtime')
        embed_llm_id = "amazon.titan-embed-text-v1" 
        
        user_query = kwargs.pop("user_query")
        query_embedding = self.fetch_user_query_embedding(user_query, bedrock_runtime, embed_llm_id)
        retrieved_information = self.fetch_retrieved_information_from_pgvector(query_embedding)
        
        kwargs["user_query"] = user_query
        kwargs["retrieved_information"] = retrieved_information
        return prompt.format(**kwargs)
    
    
    def fetch_retrieved_information_from_pgvector(self, query_vector):

        client = boto3.client('secretsmanager')

        response = client.get_secret_value(
            SecretId='rdspg-vector-secret'
        )
        database_secrets = json.loads(response['SecretString'])
        
        dbhost = database_secrets['host']
        dbport = database_secrets['port']
        dbuser = database_secrets['username']
        dbpass = database_secrets['password']

        dbconn = psycopg2.connect(host=dbhost, user=dbuser, password=dbpass, port=dbport, connect_timeout=10)
        dbconn.set_session(autocommit=True)
        cur = dbconn.cursor()
        
        query_vector = str(query_vector.tolist())
        #print(type(query_vector))

        # Execute the query
        cur.execute("""
            SELECT id, content, pdf_file_name, page_number, pdf_file_path, content_embeddings
            FROM pdfembedding
            ORDER BY content_embeddings <=> %s limit 3;
            """,(query_vector,)
        )
        
        # Fetch the results
        content = " "
        results = cur.fetchall()
        for row in results:
            content += row[1]+ "\n"*5 
            
        cur.close()
        dbconn.close()
        #print(content)
        return content

    
    def get_embedding(self, sent, bedrock_runtime, embed_llm_id):
        body = json.dumps({"inputText": sent})
        #claude_prompt = f"\n\nHuman:{sent}\n\nAssistant:"
        #body = json.dumps({ "prompt": claude_prompt, "temperature": 0.5, "top_p": 1, "top_k": 250, "max_tokens_to_sample": 200, "stop_sequences": ["\n\nHuman:"] })
        #print(body)
        accept = "application/json"
        contentType = "application/json"

        response = bedrock_runtime.invoke_model(
            body=body, modelId=embed_llm_id, accept=accept, contentType=contentType
        )
        response_body = json.loads(response.get("body").read())
        embedding = response_body.get("embedding")

        return np.squeeze(np.array(embedding).reshape(1, -1)).tolist()


    def chunk_words(self,sequence, chunk_size):
        sequence = sequence.split()
        return [' '.join(sequence[i:i+chunk_size]) for i in range(0, len(sequence), chunk_size)]

    def query_endpoint(self,payload, bedrock_runtime, embed_llm_id):
        embeddings = []
        chunk_payload = self.chunk_words(payload, 400)
        for i, chunk in enumerate(chunk_payload):
            #print("Chunk ",i)
            #print("Content ",chunk)
            embeddings_chunk = self.get_embedding(chunk, bedrock_runtime, embed_llm_id)
            embeddings.append(embeddings_chunk)
        return embeddings
    
    def parse_response(self, query_response, bedrock_runtime, embed_llm_id):
        """Parse response and return the embedding."""
        embeddings = np.array(self.query_endpoint(query_response, bedrock_runtime, embed_llm_id))
        #avg_embeddings = np.mean(embeddings, axis=0)
        # try max pooling of embedding vector
        avg_embeddings = np.max(embeddings, axis=0)

        avg_embeddings = avg_embeddings.reshape(1, -1)
        # normalization before inner product
        avg_embeddings = normalize(avg_embeddings, axis=1, norm='l2')
        return np.squeeze(avg_embeddings)


    def fetch_user_query_embedding(self, user_query, bedrock_runtime, embed_llm_id): 
        return self.parse_response(user_query, bedrock_runtime, embed_llm_id)

In [105]:
prompt_template = RetrievedPromptTemplate(input_variables=["user_query"])

In [106]:
Question = """
Who is the Chairman of the Board and Chief Executive Officer
"""

In [107]:
print(prompt_template.format(user_query=Question))




Human:

Please help me to find the answer, based on the retrieved information and the user input question.

user input question:


Who is the Chairman of the Board and Chief Executive Officer


retrieved information:

 79
2022 ANNUAL REPORT
Non-executive Director:
Liu Qin
Independent Non-executive Directors:
Chen DongshengWong Shun TakTong Wai Cheung Timothy
The biographical information of the Directors is set out in the section headed “Biographical Details and Other 
Information of the Directors” in this annual report. In addition, an up-to-date list of our Directors and their roles and functions is maintained on the Company’s website and the Stock Exchange’s website. 
None of the members of the Board are related to one another.
Throughout the Reporting Period, the Board has met the requirements of the Listing Rules regarding the appointment 
of at least three independent non-executive directors (representing at least one-third of the Board), with at least 
one of whom possessing a

In [108]:
import boto3
from langchain.llms.bedrock import Bedrock
#from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

runtime_client = boto3.client('bedrock-runtime')

inference_modifier = {'max_tokens_to_sample':500, 
                      "temperature":0.5,
                      "top_k":250,
                      "top_p":1,
                      "stop_sequences": ["\n\nHuman"]
                     }

bedrock_llm = Bedrock(
    model_id="anthropic.claude-v2",
    client = runtime_client,
    model_kwargs = inference_modifier,
    #streaming=True,
    #callbacks=[StreamingStdOutCallbackHandler()],
)

In [112]:
Question = """
Who is the Chairman of the Board and Chief Executive Officer
"""

In [113]:
from langchain.chains import LLMChain

langchain.verbose = True
llmchain = LLMChain(llm=bedrock_llm, prompt=prompt_template)
rag_results = llmchain.run({
    'user_query': Question,
    })




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m


Human:

Please help me to find the answer, based on the retrieved information and the user input question.

user input question:


Who is the Chairman of the Board and Chief Executive Officer


retrieved information:

 79
2022 ANNUAL REPORT
Non-executive Director:
Liu Qin
Independent Non-executive Directors:
Chen DongshengWong Shun TakTong Wai Cheung Timothy
The biographical information of the Directors is set out in the section headed “Biographical Details and Other 
Information of the Directors” in this annual report. In addition, an up-to-date list of our Directors and their roles and functions is maintained on the Company’s website and the Stock Exchange’s website. 
None of the members of the Board are related to one another.
Throughout the Reporting Period, the Board has met the requirements of the Listing Rules regarding the appointment 
of at least three independent non-executive directors (repre

In [114]:
print(rag_results)


Based on the retrieved information, the Chairman of the Board and Chief Executive Officer is Lei Jun.


In [99]:
Question = """
What is the main business of Xiaomi
"""

In [100]:
from langchain.chains import LLMChain

rag_results = llmchain.run({
    'user_query': Question,
    })
print(rag_results)


Based on the retrieved information, the main business of Xiaomi is hardware products like smartphones and IoT and lifestyle products. Specifically, the information states:

"Our mission is to relentlessly build amazing products with honest prices to let everyone in the world enjoy a better life through innovative technology. To achieve this, as approved by our Board in May 2018, we pledged to our existing and potential users that starting from 2018, the Xiaomi Hardware Business (“HB”), including smartphones and IoT and lifestyle products, would have an overall net profit margin that would not exceed 5.0% per year."

And 

"With the support of an improved technological framework, Xiaomi is committed to integrating multiple technological capabilities, increasing research and development investment, and providing users with more convenient, affordable, and widely applicable products and technologies."

So in summary, Xiaomi's main business is developing and selling hardware products like