## Setup

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import boto3

session = boto3.Session(profile_name='bach-dev', region_name='us-east-1')
boto3_bedrock = session.client(service_name='bedrock-runtime')

In [22]:
from io import StringIO
import sys
import textwrap

def print_ww(*args, width: int = 100, **kwargs):
    """Like print(), but wraps output to `width` characters (default 100)"""
    buffer = StringIO()
    try:
        _stdout = sys.stdout
        sys.stdout = buffer
        print(*args, **kwargs)
        output = buffer.getvalue()
    finally:
        sys.stdout = _stdout
    for line in output.splitlines():
        print("\n".join(textwrap.wrap(line, width=width)))

## Configure langchain

In [4]:
# We will be using the Titan Embeddings Model to generate our Embeddings.
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock

# - create the Anthropic Model
llm = Bedrock(model_id="anthropic.claude-v2", client=boto3_bedrock, model_kwargs={'max_tokens_to_sample':200})
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=boto3_bedrock)

## Data Preparation

In [6]:
import numpy as np
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("data/guides/imis_guide.pdf")
pages = loader.load()

# - in our testing Character split works better with this PDF data set
text_splitter = RecursiveCharacterTextSplitter(  
    chunk_size = 1000,
    chunk_overlap  = 100,
)
docs = text_splitter.split_documents(pages)

print(f"Split {len(pages)} pages into {len(docs)} chunks.")

Split 70 pages into 136 chunks.


In [9]:
avg_doc_length = lambda documents: sum([len(doc.page_content) for doc in documents])//len(documents)
avg_char_count_pre = avg_doc_length(pages)
avg_char_count_post = avg_doc_length(docs)
print(f'Average length among {len(pages)} pages loaded is {avg_char_count_pre} characters.')
print(f'After the split we have {len(docs)} chunks more than the original {len(pages)}.')
print(f'Average length among {len(docs)} chunks (after split) is {avg_char_count_post} characters.')

Average length among 70 pages loaded is 1451 characters.
After the split we have 136 chunks more than the original 70.
Average length among 136 chunks (after split) is 762 characters.


In [10]:
try:
    sample_embedding = np.array(bedrock_embeddings.embed_query(docs[0].page_content))
    print("Sample embedding of a document chunk: ", sample_embedding)
    print("Size of the embedding: ", sample_embedding.shape)

except ValueError as error:
    if  "AccessDeniedException" in str(error):
        print(f"\x1b[41m{error}\
        \nTo troubeshoot this issue please refer to the following resources.\
         \nhttps://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_access-denied.html\
         \nhttps://docs.aws.amazon.com/bedrock/latest/userguide/security-iam.html\x1b[0m\n")      
        class StopExecution(ValueError):
            def _render_traceback_(self):
                pass
        raise StopExecution        
    else:
        raise error

Sample embedding of a document chunk:  [ 0.17480469 -0.15527344 -0.44921875 ...  0.13867188 -0.46679688
 -0.484375  ]
Size of the embedding:  (1536,)


## Save to FAISS

In [11]:
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

vectorstore_faiss = FAISS.from_documents(
    docs,
    bedrock_embeddings,
)

wrapper_store_faiss = VectorStoreIndexWrapper(vectorstore=vectorstore_faiss)

## Question Answering

In [12]:
query = """When are comments required?"""
query_embedding = vectorstore_faiss.embedding_function.embed_query(query)
np.array(query_embedding)

In [19]:
relevant_documents = vectorstore_faiss.similarity_search_by_vector(query_embedding)
print(f'{len(relevant_documents)} documents are fetched which are relevant to the query.')
print('----')
for i, rel_doc in enumerate(relevant_documents):
    print(f'## Document {i+1}: {rel_doc.page_content}.......')
    print('---')

4 documents are fetched which are relevant to the query.
----
## Document 1: that end date is reached, the comment will still be accessible to read in History.  
Comments converted from Axis, however, will not be displayed in History until they are edited or removed.   Only 
Membership support can edit or remove comments converted from Axis........
---
## Document 2: 19 | P a g e  
  
When are Comments Required?  
 
Areas that require comments are noted throughout this docum ent, but here is a full listing:  
 
Moves  
 Swap  Primary &  Associate but not cancelling the former Primary  
 ATP move without changing the address or adding a wrong address flag to either HH  
Transfer In  
 Manual Transfer In (not using the Transfer In Wizard) b/c status will be F instead of T  
Roadside  
 Pre-existing co ndition (new membership or  upgrading)  
Payments  
 Leaving a HH in Partial Paid/Collect status  (taking a partial payment only)  
 Leaving a HH in Prospect or Unpaid status (not tak

### Quick way

In [20]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
prompt_template = """

Human: Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{context}
</context

Question: {question}

Assistant:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [23]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore_faiss.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)
answer = qa({"query": query})
print_ww(answer)

{'query': 'When are comments required?', 'result': ' Based on the context provided, comments are
required in the following situations:\n\n- Moves:\n  - Swapping Primary & Associate without
cancelling the former Primary\n  - ATP move without changing the address or adding a wrong address
flag\n- Transfer In: \n  - Manual Transfer In (not using the Transfer In Wizard)\n- Roadside:\n  -
Pre-existing condition (new membership or upgrading)\n- Payments:\n  - Leaving a HH in Partial
Paid/Collect status (taking a partial payment only)\n  - Leaving a HH in Prospect or Unpaid status
(not taking payment) \n  - Mailing a postdated cheque to PCC to hold for processing date\n- Creation
of a 2nd iMIS ID (intentional):\n  - Transfer In that was previously an AMA Member\n  - Converting a
Child to an Associate before they are 16 years old\n  - ASSUME - where the person taking over has a
suspended or cancelled membership\n\nIn summary', 'source_documents': [Document(page_content='that
end date is reache

Let's ask a different question:

In [29]:
query_2 = "Can a member have multiple membership numbers?"
answer_2 = answer = qa({"query": query_2})
print_ww(answer_2)

{'query': 'Can a member have multiple membership numbers?', 'result': ' Based on the provided
context, the answer is:\nNo, generally a member should only have one active membership number.
However, there are some exceptions where a member may end up with two membership numbers:\n\n- If a
non-member donor later becomes a full member, they will get a new membership number separate from
their previous non-member ID. \n\n- If someone with a suspended/cancelled membership rejoins, they
can either reinstate their old membership or get a brand new membership number. \n\n- Someone who
was previously an AMA member and is transferring in may end up with two numbers.\n\nSo while in
general members have just one active number, there are some specific cases where they may end up
with two due to their history.', 'source_documents': [Document(page_content='ensure is it still up
to date.  \n \n2. Assign New  (Membership #)  \nWhen do I use it?  \nThis is only used by Member
request, generally when the

### Customisable option
In the above scenario you explored the quick and easy way to get a context-aware answer to your question. Now let's have a look at a more customizable option with the helpf of [RetrievalQA](https://python.langchain.com/en/latest/modules/chains/index_examples/vector_db_qa.html) where you can customize how the documents fetched should be added to prompt using `chain_type` parameter. Also, if you want to control how many relevant documents should be retrieved then change the `k` parameter in the cell below to see different outputs. In many scenarios you might want to know which were the source documents that the LLM used to generate the answer, you can get those documents in the output using `return_source_documents` which returns the documents that are added to the context of the LLM prompt. `RetrievalQA` also allows you to provide a custom [prompt template](https://python.langchain.com/en/latest/modules/prompts/prompt_templates/getting_started.html) which can be specific to the model.

Note: In this example we are using Anthropic Claude as the LLM under Amazon Bedrock. This particular model [performs best](https://docs.anthropic.com/claude/docs/human-and-assistant-formatting) if the inputs are provided under `Human:` and the model is requested to generate an output after `Assistant:`. In the cell below you see an example of how to control the prompt such that the LLM stays grounded and doesn't answer outside the context.

In [30]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template = """

Human: Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{context}
</context

Question: {question}

Assistant:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore_faiss.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)
query = "What is the difference between ebill and email?"
result = qa({"query": query})
print(result['result'])
result['source_documents']

 Based on the context provided, the key difference between eBill and email is:

- eBill refers specifically to the option for AMA members to receive their annual renewal bill electronically instead of via physical mail. Choosing eBill replaces the paper bill that is normally sent out about 1 month before their membership expiry date.

- Email refers generically to a member's email address on file. The context indicates you should verify whether a member wants updates made to both their eBill preference and their email address on file if changes are requested.

So in summary, eBill is a billing preference for how to receive the annual renewal notice, while email is just the member's general email contact information.


[Document(page_content='15 | P a g e  \n  \n‘eBill’ is different from ‘Email.’ If you are making updates to eBill or an email address, ensure that you’re asking the \nMember if both should be updated.   \n                           \n \nCards & Bills  \nYou may order ad hoc C ards and Bills for members if required. You may either order Cards for the entire HH by \nselecting Household Card , or for an individual (s) by selecting Individual Card.  \n                                              \n \n \n       Household Card E xpiry  \n \nCards have a 3 year expiry and we print this replacement  date on the card.  If you order cards for any reason the system \nwill automatically check to see if the HH card expiry date is within 6 months from today.  If it is, it will not only order t he \ncard(s) you are requesting, it will advance the card expiry dat e by 3 years and order new cards for the entire HH.  \n \nIf this HH has a donor and requested  ‘Cards to:  Donor’, all 3 year replacement 