# Building a simple RAG chatbot with LangChain, Hugging Face, FAISS, Amazon SageMaker and Amazon Textract

In [1]:
%%sh
pip install sagemaker langchain amazon-textract-caller amazon-textract-textractor sentence-transformers pypdf pip install faiss-cpu -qU

In [2]:
import boto3, json, sagemaker,pypdf
from typing import Dict
from langchain import LLMChain
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.llms import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## Deploy LLM on SageMaker

In [3]:
#mistral
##takes lot of time to run
role = sagemaker.get_execution_role()

hub = {
	'HF_MODEL_ID':'mistralai/Mistral-7B-Instruct-v0.1',
	'SM_NUM_GPUS': '1'
}

huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"),
	env=hub,
	role=role 
)

predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.2xlarge",
	container_startup_health_check_timeout=600,
  )

--------------------------*

UnexpectedStatusException: Error hosting endpoint huggingface-pytorch-tgi-inference-2024-01-03-11-56-47-069: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html

In [5]:
#t5 small
# Hub Model configuration. https://huggingface.co/models
role = sagemaker.get_execution_role()

hub = {
	'HF_MODEL_ID':'google/flan-t5-small',
	'SM_NUM_GPUS': json.dumps(1)
}



# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.12xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "Translate to German:  My name is Arthur",
})

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
---------!

[{'generated_text': 'Meine Namen ist Arthur.'}]

In [3]:
#zephyr
role = sagemaker.get_execution_role()

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
	'SM_NUM_GPUS': json.dumps(1)
}



# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.12xlarge",
	container_startup_health_check_timeout=600,
  )
  
# send request
predictor.predict({
	"inputs": "<|system|>\nYou are a pirate chatbot who always responds with Arr!</s>\n<|user|>\nThere's a llama on my lawn, how can I get rid of him?</s>\n<|assistant|>\n",
})

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
---------------------------*

UnexpectedStatusException: Error hosting endpoint huggingface-pytorch-tgi-inference-2023-11-26-03-35-39-689: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint..

In [6]:
endpoint_name = predictor.endpoint_name
endpoint_name

'huggingface-pytorch-tgi-inference-2023-11-26-04-06-40-557'

## Configure LLM in LangChain

In [7]:
model_kwargs = {"max_new_tokens": 512, "top_p": 0.8, "temperature": 0.8}

In [18]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps(
            # Mistral prompt, see https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
            {"inputs": f"<s>[INST] {prompt} [/INST]", "parameters": {**model_kwargs}}
        )
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        splits = response_json[0]["generated_text"].split("[/INST] ")
        if len(splits) > 1:
            return splits[1]
        else:
            return "Expected value not found in generated_text"


content_handler = ContentHandler()

In [19]:
sm_client = boto3.client("sagemaker-runtime") # needed for AWS credentials

llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    model_kwargs=model_kwargs,
    content_handler=content_handler,
    client=sm_client,
)

## Zero-shot example

In [20]:
system_prompt = """
As Paul Graham's friend, please answer the question, focusing on his life.
Don't invent facts. If you can't provide a factual answer, say you don't know what the answer is.
"""

prompt = PromptTemplate.from_template(system_prompt + "{content}")

In [21]:
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [22]:
question = "What were the paul graham's knowledge areas?"

query = f"question: {question}"

In [23]:
answer = llm_chain.run({query})
print(answer)

Expected value not found in generated_text


## RAG example with PDF files

In [51]:
from langchain.document_loaders import AmazonTextractPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader

### Upload local PDF files to S3

Sources:
* https://www.iea.org/reports/world-energy-investment-2023
* https://www.iea.org/reports/coal-2022
* https://www.iea.org/reports/world-energy-outlook-2023

Feel free to use your own files, the code below should work without any change.

In [25]:
# Define S3 bucket and prefix for PDF storage

bucket = sagemaker.Session().default_bucket()
prefix = "langchain-rag-demo"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [45]:
%%sh -s $bucket $prefix
aws s3 cp --recursive pdfs s3://$1/$2/


The user-provided path pdfs does not exist.


CalledProcessError: Command 'b'aws s3 cp --recursive pdfs s3://$1/$2/\n'' returned non-zero exit status 255.

In [33]:
import boto3

# Create an S3 client
s3 = boto3.client('s3')

# Get the current AWS account ID
account_id = boto3.client('sts').get_caller_identity().get('Account')

# Get the default S3 bucket associated with the current AWS session
default_bucket = f'sagemaker-{account_id}'

print(f"Default S3 Bucket: {default_bucket}")


Default S3 Bucket: sagemaker-254455524940


In [29]:
import boto3

# Replace 'your-s3-bucket-name' with the actual name of your S3 bucket
bucket_name = 'sagemaker-us-east-1-254455524940'

# Create an S3 client
s3 = boto3.client('s3')

# Validate that the specified bucket exists
try:
    s3.head_bucket(Bucket=bucket_name)
except s3.exceptions.ClientError as e:
    if e.response['Error']['Code'] == '404':
        print(f"The specified bucket '{bucket_name}' does not exist.")
    else:
        print(f"Error accessing the bucket '{bucket_name}': {e}")
    # You might want to handle this error appropriately based on your use case.

# Assign the bucket variable
bucket = bucket_name

# Now you can use the 'bucket' variable in your code
print(f"The S3 bucket is: {bucket}")


The S3 bucket is: sagemaker-us-east-1-254455524940


In [31]:
# Build list of S3 URIs

s3 = boto3.client("s3")
objs = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
objs = objs['Contents']
uris = [f's3://{bucket}/{obj["Key"]}' for obj in objs]
uris    

['s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/',
 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/Coal2022.pdf',
 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/WorldEnergyInvestment2023.pdf',
 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/WorldEnergyOutlook2023.pdf']

In [38]:
print(uris)

['s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/', 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/Coal2022.pdf', 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/WorldEnergyInvestment2023.pdf', 's3://sagemaker-us-east-1-254455524940/langchain-rag-demo/WorldEnergyOutlook2023.pdf']


### Analyze documents with Amazon Textract and split them in chunks

In [73]:
%%time

textract_client = boto3.client('textract')
splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=0)

all_chunks = []

for uri in uris:
    loader = AmazonTextractPDFLoader(uri, client=textract_client)
    document = loader.load()
    chunks = splitter.split_documents(document)
    all_chunks += chunks
    print(f"Loaded {uri}, {len(document)} pages, {len(chunks)} chunks")



EndpointConnectionError: Could not connect to the endpoint URL: "https://textract.eu-north-1.amazonaws.com/"

In [54]:
loader = PyPDFDirectoryLoader("s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/")
docs = loader.load()
len(docs)

0

In [76]:
print(uri)

s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/Coal2022.pdf


In [77]:
loader = PyPDFDirectoryLoader(uri)
docs = loader.load()

# Print the result for debugging
print(docs)

# Check the length of docs
if docs is not None:
    print(len(docs))
else:
    print("Loading documents failed.")


[]
0


In [81]:
import boto3
from langchain.blob import Blob
from langchain.document_loaders.pdf import PyPDFLoader

# Specify the S3 path to the PDF file
s3_uri = "s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/Coal2022.pdf"

# Extract the bucket name and object key from the S3 URI
bucket_name, object_key = s3_uri.replace("s3://", "").split("/", 1)

# Download the PDF file locally
local_file_path = "/path/to/local/file.pdf"  # Choose a local path
s3_client = boto3.client("s3")
s3_client.download_file(bucket_name, object_key, local_file_path)

# Create a Blob from the local file
blob = Blob.from_path(local_file_path)

# Create a PyPDFLoader instance and load the document
loader = PyPDFLoader(blob)
docs = loader.load()

# Print the result for debugging
print(docs)

# Check the length of docs
if docs is not None:
    print(len(docs))
else:
    print("Loading documents failed.")



ModuleNotFoundError: No module named 'langchain.blob'

In [71]:
!pip install PyPDF2
import s3fs
from PyPDF2 import PdfReader

# Specify the S3 path to the PDF file
pdf_file_path = "s3://sagemaker-us-east-1-254455524940/langchain-rag-demo/Coal2022.pdf"

# Use s3fs to open the file from S3
fs = s3fs.S3FileSystem()
with fs.open(pdf_file_path, "rb") as file:
    # Use PdfReader instead of PdfFileReader
    pdf_reader = PdfReader(file)
    
    # Get the number of pages using len(reader.pages)
    num_pages = len(pdf_reader.pages)
    
    # Do further processing as needed
    print(f"Number of pages: {num_pages}")



Number of pages: 137


In [None]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)
 
 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(docs)
len(texts)

### Embed document chunks and store them in FAISS
https://github.com/facebookresearch/faiss 

In [72]:
# Define embedding model
# See https://huggingface.co/spaces/mteb/leaderboard

embedding_model_id = "BAAI/bge-small-en-v1.5"

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
)

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
%%time
# Embed chunks
embeddings_db = FAISS.from_documents(all_chunks, embeddings)

In [None]:
# Save database
embeddings_db.save_local("faiss_index")

### Shortcut : load existing embedding database

In [None]:
embeddings_db = FAISS.load_local("faiss_index", embeddings)

********

### Configure RAG chain

In [None]:
retriever = embeddings_db.as_retriever(search_kwargs={"k": 10})

In [None]:
# Define prompt template
prompt_template = """
As a helpful energy specialist, please answer the question below, focusing on numerical data and using only the context below.
Don't invent facts. If you can't provide a factual answer, say you don't know what the answer is.

question: {question}

context: {context}
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [None]:
chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",
    retriever=retriever, 
    chain_type_kwargs = {"prompt": prompt})

### Ask our question again

In [None]:
question = "What is the latest trend for solar investments in China?"
answer = chain.run({"query": question})
print(answer)

In [None]:
question = "What does STEPS mean?"
answer = chain.run({"query": question})
print(answer)

## Delete endpoint and model

In [82]:
predictor.delete_model()
predictor.delete_endpoint()