In [78]:
endpoint_name = "jumpstart-dft-hf-text2text-flan-t5-xxl"
kendra_index_id = "ebd13115-d4c2-4371-bfc2-***********"
region = "eu-west-1"

In [88]:
import boto3
runtime = boto3.client("sagemaker-runtime")
#Put the correct endpoint name 
endpoint_name = endpoint_name

In [96]:
prompt_template_without_context = """
"\nThe following is a friendly conversation between a human and an AI.\nThe AI is talkative and provides lots of specific details from its context.\n
If the AI does not know the answer to a question, it truthfully says it\ndoes not know.\n
\nInstruction: Based on the above documents, provide a detailed answer for, What's SageMaker? Solution:\n"
"""

prompt_template_context = """
"\nThe following is a friendly conversation between a human and an AI.\nThe AI is talkative and provides lots of specific details from its context.\n
If the AI does not know the answer to a question, it truthfully says it\ndoes not know.\n
Document Title: Getting started with AWS Inferentia development - Amazon Elastic Compute Cloud\n
Document Excerpt: \nGetting started There are a variety of ways that you can get started. Use Amazon SageMaker, a fully-managed service that is the easiest way to get started with machine learning models. For more information, see Compile and deploy a TensorFlow model on Inf1 instances on github. Launch an Inf1 instance using the Deep Learning AMI. For more information, see AWS Inferentia with DLAMI in the AWS Deep Learning AMI Developer Guide.\n\n\n
Document Title: Getting started with AWS Inferentia development - Amazon Elastic Compute Cloud\n
Document Excerpt: \nUse Amazon SageMaker, a fully-managed service that is the easiest way to get started with\n\n\n
Document Title: Best practices for EC2 Spot - Amazon Elastic Compute Cloud\n
Document Excerpt: \nworkloads: Amazon EMR, Amazon ECS, AWS Batch, Amazon EKS, Amazon SageMaker, AWS Elastic Beanstalk, and Amazon GameLift. To learn more about Spot best practices with these services\n
\nInstruction: Based on the above documents, provide a detailed answer for, What's SageMaker? Answer \"don't know\" if not present in the document. Solution:\n"
"""


parameters = {
    "max_length": 500,
    "num_return_sequences": 1,
    "num_beams": 1,
    "no_repeat_ngram_size": 3,
    "temperature": 0.000001
}


### LLM answers the question wrongly without any input context

In [97]:
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=json.dumps({"text_inputs": prompt_template_without_context, **parameters}),
    ContentType="application/json",
)

print(response["Body"].read())

b'{"generated_texts": ["SageMaker is a software tool for creating a sage file."]}'


### LLM answers the question correctly with input context

In [98]:
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=json.dumps({"text_inputs": prompt_template_context, **parameters}),
    ContentType="application/json",
)

print(response["Body"].read())

b'{"generated_texts": ["Amazon SageMaker, a fully-managed service that is the easiest way to get started with machine learning models."]}'


### Now we can fetch the correct context from Amazon Kendra, then put into LLM as the context information

In [105]:
from aws_langchain.kendra_index_retriever import KendraIndexRetriever
from langchain.chains import RetrievalQA
from langchain import OpenAI
from langchain.prompts import PromptTemplate
from langchain import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import ContentHandlerBase
import json

class ContentHandler(ContentHandlerBase):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        parameters = {
            "max_length": 500,
            "num_return_sequences": 1,
            "num_beams": 1,
            "no_repeat_ngram_size": 3,
            "temperature": 0.000001
        }
        input_str = json.dumps({"text_inputs": prompt, **parameters})
        #print(input_str)
        #input_str = json.dumps({"text_inputs": prompt, "parameters": model_kwargs})
        #return input_str
        return input_str.encode('utf-8')

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        print(response_json)
        return response_json['generated_texts'][0]

content_handler = ContentHandler()
llm=SagemakerEndpoint(
        endpoint_name=endpoint_name,
        region_name=region, 
        model_kwargs={"temperature":1e-10, "max_length": 500},
        content_handler=content_handler
    )

retriever = KendraIndexRetriever(kendraindex=kendra_index_id,
        awsregion=region,
        return_source_documents=True
    )

prompt_template = """
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it
does not know.
{context}
Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents=True
)
result = qa("What's SageMaker?")
print(result)

{'generated_texts': ['Amazon SageMaker is a fully-managed service that is the easiest way to get started with machine learning models.']}
{'query': "What's SageMaker?", 'result': 'Amazon SageMaker is a fully-managed service that is the easiest way to get started with machine learning models.', 'source_documents': [Document(page_content='Document Title: Getting started with AWS Inferentia development - Amazon Elastic Compute Cloud\nDocument Excerpt: \nGetting started There are a variety of ways that you can get started. Use Amazon SageMaker, a fully-managed service that is the easiest way to get started with machine learning models. For more information, see Compile and deploy a TensorFlow model on Inf1 instances on github. Launch an Inf1 instance using the Deep Learning AMI. For more information, see AWS Inferentia with DLAMI in the AWS Deep Learning AMI Developer Guide.\n', metadata={'source': 'https://s3.eu-west-1.amazonaws.com/amazon-kendra-sample-docs-eu-west-1/documents/AWSEC2/lat