## Sagemaker Endpoints

In [3]:
import json 
from typing import Dict, List

import boto3
from langchain import SagemakerEndpoint
from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
from langchain.llms.sagemaker_endpoint import LLMContentHandler 

In [4]:
aws_region = boto3.Session().region_name
print(aws_region)

us-west-2


### Falcon-7b-instruct 

In [2]:
class ContentHandlerLLM(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
        return input_str.encode('utf-8')
      
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))        
        return response_json[0]["generated_text"]

In [3]:
endpoint_name = "falcon-7b-instruct"

In [4]:
parameters = {
    "max_new_tokens": 300,
    "return_full_text": False,
    "do_sample": True,
    "top_p": 0.3,
}

content_handler = ContentHandlerLLM()

llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    region_name=aws_region,
    model_kwargs=parameters,
    content_handler=content_handler,
)

In [5]:
llm("What is the largest mammal")

' on Earth?\nThe largest mammal on Earth is the blue whale.'

### Embedding 

`SagemakerEndpointEmbeddingsJumpStart` [Source](https://medium.com/@ryanntk/deploying-hugging-face-embedding-models-on-aws-sagemaker-a-comprehensive-tutorial-with-langchain-af8e0b405b51)

In [5]:
import time

# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function
class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings):
    def embed_documents(
        self, texts: List[str], chunk_size: int = 5
    ) -> List[List[float]]:
        """Compute doc embeddings using a SageMaker Inference Endpoint.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size defines how many input texts will
                be grouped together as request. If None, will use the
                chunk size specified by the class.

        Returns:
            List of embeddings, one for each text.
        """
        results = []
        _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size
        st = time.time()
        for i in range(0, len(texts), _chunk_size):
            response = self._embedding_func(texts[i : i + _chunk_size])
            results.extend(response)
        time_taken = time.time() - st
        print(
            f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}"
        )
        return results

In [6]:
class ContentHandler(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:
        """
        Transforms the input into bytes that can be consumed by SageMaker endpoint.
        Args:
            inputs: List of input strings.
            model_kwargs: Additional keyword arguments to be passed to the endpoint.
        Returns:
            The transformed bytes input.
        """
        # Example: inference.py expects a JSON string with a "inputs" key:
        # input_str = json.dumps({"inputs": inputs, **model_kwargs})
        input_str = json.dumps({"inputs": inputs, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> List[List[float]]:
        """
        Transforms the bytes output from the endpoint into a list of embeddings.
        Args:
            output: The bytes output from SageMaker endpoint.
        Returns:
            The transformed output - list of embeddings
        Note:
            The length of the outer list is the number of input strings.
            The length of the inner lists is the embedding dimension.
        """
        # Example: inference.py returns a JSON string with the list of
        # embeddings in a "vectors" key:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json["vectors"]

In [7]:
content_handler = ContentHandler()

emb_parameters = {"device": "cuda"}

embeddings = SagemakerEndpointEmbeddingsJumpStart(
    # credentials_profile_name="credentials-profile-name",
    endpoint_name="sentence-transformers-all-mpnet-base-v2",
    region_name=aws_region,
    content_handler=content_handler,
    model_kwargs=emb_parameters,
)

In [6]:
query_result = embeddings.embed_query("foo bar dog")
len(query_result) 

768

In [11]:
query_result = embeddings.embed_documents(["foo bar dog", "Veronica is a Pythonista"])
len(query_result) 

got results for 2 in 0.7462847232818604s, length of embeddings list is 2


2

In [8]:
len(query_result[1])

768

In [12]:
print(query_result)

[[0.023010829463601112, 0.06674211472272873, 0.015658535063266754, -0.059901751577854156, 0.00899288710206747, 0.04786750301718712, 0.005673463921993971, 0.023586349561810493, -0.06605415046215057, 0.02126981131732464, -0.0002889889001380652, 0.03975634276866913, -0.03429273143410683, 0.05184069275856018, 0.02275446429848671, -0.01708190329372883, 0.017952624708414078, 0.03652235493063927, 0.07056411355733871, -0.040407828986644745, -0.00187441217713058, 0.02352127991616726, 0.004665896762162447, 0.018560048192739487, -0.047574300318956375, 0.002585913985967636, -0.018474262207746506, -0.003118740627542138, -0.06551744788885117, -0.013685131445527077, -0.04172193631529808, 0.01329813152551651, -0.017570368945598602, 0.003636210458353162, 2.1068485693831462e-06, -0.01581939496099949, -0.01604585535824299, 0.005425119306892157, 0.0680365040898323, -0.05921432375907898, -0.08718516677618027, -0.020225796848535538, -0.02191966399550438, 0.006423304323107004, -0.029958896338939667, -0.06446

## Steps

![steps](image.png)

### Load Document

In [11]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

In [25]:
# loader = DirectoryLoader(
#     "docs/cs229_lectures/", glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader
# )
# pages = loader.load()
# len(pages)

100%|██████████| 3/3 [00:00<00:00,  6.00it/s]


56

In [12]:
loader = PyPDFLoader(
    "docs/cs229_lectures/MachineLearning-Lecture01.pdf",
)
pages = loader.load() 
len(pages)

22

### Split Document

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [14]:
splits = text_splitter.split_documents(pages)
len(splits)

57

### Vector store and Embedding

In [7]:
from langchain.vectorstores import Chroma


persist_directory = "docs/chromahf/"

Save documents

In [None]:
# vectordb = Chroma.from_documents(
#     documents=splits, embedding=embeddings, persist_directory=persist_directory
# )

Load documents

In [8]:
vectordb = Chroma(embedding_function=embeddings, persist_directory=persist_directory)

print(vectordb._collection.count())

57


In [9]:
vectordb.similarity_search("what did they say about matlab?")

[Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is a type of machine \nlearning algorithm for modeling time series and a few other things, so  extensions to the \nmaterials that I'll be covering in the main  lectures. And attend ance at the discussion \nsections is optional, okay?  \nSo that was all I had from l ogistics. Before we move on to start talking a bit about \nmachine learning, let me check what questions you have. Yeah?  \nStudent : [Inaudible] R or something like that?  \nInstructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're \nwelcome to use R, but I would strongly advi se against it, mainly because in the last \nproblem set, we actually supply some code th at will run in Octave  but that would be \nsomewhat painful for you to translate into R yourself. So for your other assignments, if \nyou wanna submit

### Retrieval

In [15]:
from langchain.docstore.document import Document


def pretty_print_docs(docs: List[Document]) -> None:
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


def get_metadata(docs: List[Document]) -> None:
    for doc in docs:
        print(doc.metadata)

#### MMR

In [20]:
question = "what did they say about matlab?"

docs = vectordb.max_marginal_relevance_search(question, k=2, fetch_k=3)
print(docs) 

[Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is a type of machine \nlearning algorithm for modeling time series and a few other things, so  extensions to the \nmaterials that I'll be covering in the main  lectures. And attend ance at the discussion \nsections is optional, okay?  \nSo that was all I had from l ogistics. Before we move on to start talking a bit about \nmachine learning, let me check what questions you have. Yeah?  \nStudent : [Inaudible] R or something like that?  \nInstructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're \nwelcome to use R, but I would strongly advi se against it, mainly because in the last \nproblem set, we actually supply some code th at will run in Octave  but that would be \nsomewhat painful for you to translate into R yourself. So for your other assignments, if \nyou wanna submit

In [21]:

        
get_metadata(docs)

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Working with Metadata

In [23]:
question = "what did they say about regression on page 3?"

docs = vectordb.similarity_search(
    question,
    k=2,
    filter={"page": 3},
)

pretty_print_docs(docs) 

Document 1:

So, see, most of us use learning algorithms half a dozen, a dozen, maybe dozens of times 
without even knowing it.  
And of course, learning algorithms are also  doing things like giving us a growing 
understanding of the human genome. So if so meday we ever find a cure for cancer, I bet 
learning algorithms will have had a large role in that. That's sort of the thing that Tom 
works on, yes?  
So in teaching this class, I sort of have thre e goals. One of them is just to I hope convey 
some of my own excitement a bout machine learning to you.  
The second goal is by the end of this class, I hope all of you will be ab le to apply state-of-
the-art machine learning algorithms to whatev er problems you're interested in. And if you 
ever need to build a system for reading zi p codes, you'll know how to do that by the end 
of this class.  
And lastly, by the end of this class, I reali ze that only a subset of  you are interested in 
doing research in machine learning, but by t

In [29]:
get_metadata(docs)

{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Self-Query 

In [32]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [33]:
metadata_field_info = [
    # AttributeInfo(
    #     name="source",
    #     description="The lecture the chunk is from, should be one of `docs/cs229_lectures/MachineLearning-Lecture01.pdf`, `docs/cs229_lectures/MachineLearning-Lecture02.pdf`, or `docs/cs229_lectures/MachineLearning-Lecture03.pdf`",
    #     type="string",
    # ),
    AttributeInfo(
        name="page",
        description="The page from the lecture",
        type="integer",
    ),
]

In [34]:
document_content_description = "Lecture notes"
base_llm = OpenAI(temperature=0)

retriever = SelfQueryRetriever.from_llm(
    base_llm, vectordb, document_content_description, metadata_field_info, verbose=True
) 

In [35]:
question = "what did they say about regression on page 3?"

docs = retriever.get_relevant_documents(question) 



query='regression' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='page', value=3) limit=None


In [36]:
pretty_print_docs(docs)

Document 1:

So, see, most of us use learning algorithms half a dozen, a dozen, maybe dozens of times 
without even knowing it.  
And of course, learning algorithms are also  doing things like giving us a growing 
understanding of the human genome. So if so meday we ever find a cure for cancer, I bet 
learning algorithms will have had a large role in that. That's sort of the thing that Tom 
works on, yes?  
So in teaching this class, I sort of have thre e goals. One of them is just to I hope convey 
some of my own excitement a bout machine learning to you.  
The second goal is by the end of this class, I hope all of you will be ab le to apply state-of-
the-art machine learning algorithms to whatev er problems you're interested in. And if you 
ever need to build a system for reading zi p codes, you'll know how to do that by the end 
of this class.  
And lastly, by the end of this class, I reali ze that only a subset of  you are interested in 
doing research in machine learning, but by t

In [37]:
get_metadata(docs)

{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


In [38]:
question = (
    "what did they say about regression between pages one and ten?"
)

docs = retriever.get_relevant_documents(question)

get_metadata(docs)



query='regression' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='page', value=1), Comparison(comparator=<Comparator.LTE: 'lte'>, attribute='page', value=10)]) limit=None
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 6, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 10, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Compression

In [39]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [40]:
base_llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(base_llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=vectordb.as_retriever()
)

In [41]:
question = "what did they say about matlab?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

Document 1:

"I personally end up using MATLAB quite a bit more often for various reasons."
----------------------------------------------------------------------------------------------------
Document 2:

"And the student said, "Oh, it was the MATLAB." So for those of you that don't know MATLAB yet, I hope you do learn it. It's not hard, and we'll actually have a short MATLAB tutorial in one of the discussion sections for those of you that don't know it."
----------------------------------------------------------------------------------------------------
Document 3:

"MATLAB is I guess part of the programming language that makes it very easy to write codes using matrices, to write code for numerical routines, to move data around, to plot data. And it's sort of an extremely easy to learn tool to use for implementing a lot of learning algorithms."
----------------------------------------------------------------------------------------------------
Document 4:

"So one more organizational

In [42]:
docs = vectordb.similarity_search(question, k=4)
pretty_print_docs(docs)

Document 1:

So later this quarter, we'll use the discussion sections to talk about things like convex 
optimization, to talk a little bit about hidde n Markov models, which is a type of machine 
learning algorithm for modeling time series and a few other things, so  extensions to the 
materials that I'll be covering in the main  lectures. And attend ance at the discussion 
sections is optional, okay?  
So that was all I had from l ogistics. Before we move on to start talking a bit about 
machine learning, let me check what questions you have. Yeah?  
Student : [Inaudible] R or something like that?  
Instructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're 
welcome to use R, but I would strongly advi se against it, mainly because in the last 
problem set, we actually supply some code th at will run in Octave  but that would be 
somewhat painful for you to translate into R yourself. So for your other assignments, if 
you wanna submit a solution in R, that'

In [43]:
print(get_metadata(compressed_docs))
print(get_metadata(docs))

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
None
{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
None


#### Combine threshold score and Compression

In [48]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(
        search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.9}
    ),
)

In [49]:
compression_retriever.get_relevant_documents(question)



[]

#### Combine MMR and Compression

In [50]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr"),
)

docs = compression_retriever.get_relevant_documents(question)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

In [51]:
pretty_print_docs(docs)

Document 1:

"I personally end up using MATLAB quite a bit more often for various reasons."


### Question Answering

#### RetrievalQA Chain

In [15]:
from langchain.chains import RetrievalQA

##### Default Chain Type (Stuff)

In [16]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    return_source_documents=True,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
)

In [68]:
question = "what did they say about matlab?"
result = qa_chain({"query": question}) 

result 

{'query': 'what did they say about matlab?',
 'result': '\n\nMatlab is a programming language and environment that is commonly used in data analysis, visualization, and numerical computing. It is widely used in scientific and engineering fields, including physics, engineering, and finance. It is often used to create interactive visualizations and perform complex numerical computations. It is also used to develop and test algorithms, which are mathematical procedures that can be used to solve complex problems. It is a powerful tool for data analysis and can be used to analyze large datasets, perform statistical analysis, and create complex visualizations. It is also used to develop and test algorithms, which are mathematical procedures that can be used to solve complex problems.',
 'source_documents': [Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is

In [70]:
pretty_print_docs(result['source_documents'])

Document 1:

So later this quarter, we'll use the discussion sections to talk about things like convex 
optimization, to talk a little bit about hidde n Markov models, which is a type of machine 
learning algorithm for modeling time series and a few other things, so  extensions to the 
materials that I'll be covering in the main  lectures. And attend ance at the discussion 
sections is optional, okay?  
So that was all I had from l ogistics. Before we move on to start talking a bit about 
machine learning, let me check what questions you have. Yeah?  
Student : [Inaudible] R or something like that?  
Instructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're 
welcome to use R, but I would strongly advi se against it, mainly because in the last 
problem set, we actually supply some code th at will run in Octave  but that would be 
somewhat painful for you to translate into R yourself. So for your other assignments, if 
you wanna submit a solution in R, that'

In [69]:
get_metadata(result['source_documents'])

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 2, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


In [66]:
result['result'].strip('\n\n')

'The students were told that MATLAB was a good tool to use for their project, but that it was not necessary to use it for the term project. The instructor also stated that he personally used MATLAB more often than R.'

##### Setting Prompt format

In [10]:
from langchain.chains.retrieval_qa.base import BaseRetrievalQA


def get_qa_result(question: str, qa_chain: BaseRetrievalQA) -> str:
    result = qa_chain({"query": question})

    return result["result"]


qa_result = lambda question, qa_chain: qa_chain({"query": question})

In [12]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Remove Helpful for Falcon 7b 

In [13]:
from langchain.chat_models import ChatOpenAI

chat_llm = ChatOpenAI(temperature=0)

In [64]:
qa_chain = RetrievalQA.from_chain_type(
    chat_llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [65]:
get_qa_result("what did they say about matlab?", qa_chain) 

'The instructor advised against using R for the problem set because the code provided would be difficult to translate. They mentioned that MATLAB is worth learning and that they personally use it more often. Thanks for asking!'

In [66]:
get_qa_result("What is the current yam price in Mile 12 market?", qa_chain)

"I don't know. Thanks for asking!"

##### Map Reduce Chain Type

In [67]:
qa_chain_mr = RetrievalQA.from_chain_type(
    chat_llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type="map_reduce",
)

In [68]:
get_qa_result("what did they say about matlab?", qa_chain_mr)

'There is no mention of MATLAB in the provided text.'

##### Refine Chain Type

In [16]:
qa_chain_refine = RetrievalQA.from_chain_type(
    chat_llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type="refine",
)

In [74]:
get_qa_result("what did they say about matlab?", qa_chain_refine)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


'I apologize for the confusion earlier. "refine_template" is not a recognized term or function in MATLAB. It is possible that it is a custom function or variable specific to a particular code or project. Without further information or context, it is difficult to provide a specific explanation. If you can provide more details or clarify your question, I would be happy to help.'

In [18]:
# get_qa_result("What are major topics for this class?", qa_chain_refine) 

#### Limitation

The conversation history is not being stored

In [19]:
from langchain.chains.retrieval_qa.base import BaseRetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate


def get_qa_result(question: str, qa_chain: BaseRetrievalQA) -> str:
    result = qa_chain({"query": question})

    return result["result"]


chat_llm = ChatOpenAI(temperature=0)

# Build prompt
template = """
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_chain = RetrievalQA.from_chain_type(
    chat_llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [22]:
get_qa_result("what are the topics covered?", qa_chain) 

'The topics covered include statistics, algebra, extensions in machine learning, and the late homework policy. Thanks for asking!'

In [24]:
get_qa_result("Tell me how often each of these topics were treated?", qa_chain) 

'The frequency of each topic being discussed is not mentioned in the given context. \nThanks for asking!'

#### Memory

In [70]:
from langchain.memory import ConversationBufferMemory
from pprint import pprint

memory = ConversationBufferMemory(
    return_messages=True,
    memory_key='chat_history'
)

In [71]:
pprint(memory)

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[]), output_key=None, input_key=None, return_messages=True, human_prefix='Human', ai_prefix='AI', memory_key='chat_history')


In [72]:
from langchain.chains import ConversationalRetrievalChain

qa_chain_conv = ConversationalRetrievalChain.from_llm(
    chat_llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 3}),
    # return_source_documents=True,
    combine_docs_chain_kwargs={"prompt": QA_CHAIN_PROMPT},
    memory=memory
)

In [73]:
result = qa_chain_conv({"question": "what are the topics covered?"})
result 

{'question': 'what are the topics covered?',
 'chat_history': [HumanMessage(content='what are the topics covered?', additional_kwargs={}, example=False),
  AIMessage(content='The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!', additional_kwargs={}, example=False)],
 'answer': 'The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!'}

In [74]:
result['answer']

'The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!'

In [75]:
pprint(memory)

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='what are the topics covered?', additional_kwargs={}, example=False), AIMessage(content='The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!', additional_kwargs={}, example=False)]), output_key=None, input_key=None, return_messages=True, human_prefix='Human', ai_prefix='AI', memory_key='chat_history')


In [76]:
memory.chat_memory.messages

[HumanMessage(content='what are the topics covered?', additional_kwargs={}, example=False),
 AIMessage(content='The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!', additional_kwargs={}, example=False)]

In [77]:
# result = qa_chain_conv({"question": "Tell me how often each of these topics were treated?"})
# result 

In [78]:
# memory.chat_memory.messages

In [79]:
result = qa_chain_conv({"question": "Tell me more about these topics?"})
result 

{'question': 'Tell me more about these topics?',
 'chat_history': [HumanMessage(content='what are the topics covered?', additional_kwargs={}, example=False),
  AIMessage(content='The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!', additional_kwargs={}, example=False),
  HumanMessage(content='Tell me more about these topics?', additional_kwargs={}, example=False),
  AIMessage(content='The topics covered in this class include statistics, algebra, machine learning, supervised learning, and housing price prediction. The class also discusses extensions of the material taught in the main lectures and emphasizes the importance of independent work on homework problems without referring to notes or solutions from previous years. The instructor also mentions the late homework policy. Thanks for asking!', additional_kwargs={}, example=False)],
 'answer': 'The topics covered in this class include statist

In [80]:
result['answer']

'The topics covered in this class include statistics, algebra, machine learning, supervised learning, and housing price prediction. The class also discusses extensions of the material taught in the main lectures and emphasizes the importance of independent work on homework problems without referring to notes or solutions from previous years. The instructor also mentions the late homework policy. Thanks for asking!'

In [81]:
memory.chat_memory.messages

[HumanMessage(content='what are the topics covered?', additional_kwargs={}, example=False),
 AIMessage(content='The topics covered include statistics, algebra, machine learning, and extensions of the material taught in the main lectures. Thanks for asking!', additional_kwargs={}, example=False),
 HumanMessage(content='Tell me more about these topics?', additional_kwargs={}, example=False),
 AIMessage(content='The topics covered in this class include statistics, algebra, machine learning, supervised learning, and housing price prediction. The class also discusses extensions of the material taught in the main lectures and emphasizes the importance of independent work on homework problems without referring to notes or solutions from previous years. The instructor also mentions the late homework policy. Thanks for asking!', additional_kwargs={}, example=False)]

## Chatbot

In [8]:
from scripts.sentence_transformer import (
    ContentHandler,
    SagemakerEndpointEmbeddingsJumpStart,
)
import boto3

aws_region = boto3.Session().region_name

content_handler = ContentHandler()

emb_parameters = {"device": "cuda"}

embeddings = SagemakerEndpointEmbeddingsJumpStart(
    # credentials_profile_name="credentials-profile-name",
    endpoint_name="sentence-transformers-all-mpnet-base-v2",
    region_name=aws_region,
    content_handler=content_handler,
    model_kwargs=emb_parameters,
)

In [10]:
print(embeddings.embed_query("Veronica is a Pythonista"))

[0.06125985085964203, -0.012730025686323643, 0.0026824057567864656, -0.011465264484286308, 0.05068375542759895, 0.031877126544713974, -0.054364461451768875, 0.01286295522004366, -0.0719950795173645, 0.0407085120677948, -0.051409702748060226, 0.07480666786432266, -0.027688778936862946, 0.04893019422888756, -0.04801536351442337, -0.031737539917230606, 0.0098807904869318, 0.00830261129885912, 0.08461165428161621, -0.015256528742611408, -0.027286618947982788, 0.043101273477077484, 0.016588855534791946, -0.0009410082129761577, 0.06394019722938538, -0.023366713896393776, 0.009754469618201256, 0.02499263547360897, -0.009198229759931564, 0.028949543833732605, 0.029997657984495163, -0.036026209592819214, 0.0054996199905872345, 0.027064844965934753, 1.3621404377772706e-06, 0.023159606382250786, 0.0032908061984926462, 0.0027586771175265312, -0.03451158478856087, 0.0014546978054568172, -0.018534166738390923, -0.04215547814965248, 0.051722750067710876, 0.0038519613444805145, -0.021167853847146034, 

In [17]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.chains.conversational_retrieval.base import BaseConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

from typing import Literal

In [18]:
llm_name = "gpt-3.5-turbo"


def load_db(file: str, chain_type: Literal["stuff", "map_reduce"], k: int) -> BaseConversationalRetrievalChain:
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)
    # define embedding
    embeddings = SagemakerEndpointEmbeddingsJumpStart(
        endpoint_name="sentence-transformers-all-mpnet-base-v2",
        region_name=aws_region,
        content_handler=content_handler,
        model_kwargs=emb_parameters,
    )
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name=llm_name, temperature=0),
        chain_type=chain_type,
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa

In [24]:
qa = load_db("docs/cs229_lectures/MachineLearning-Lecture01.pdf", "stuff", 2)

result = qa({"question": "What is regression", "chat_history": []})

result

got results for 77 in 95.46276664733887s, length of embeddings list is 77


{'question': 'What is regression',
 'chat_history': [],
 'answer': 'Regression is a type of supervised learning problem in machine learning. It involves predicting a continuous value or variable based on input features. In regression, the goal is to find the relationship between the input variables and the output variable in order to make accurate predictions. The term "regression" specifically refers to the fact that the variable being predicted is continuous.',
 'source_documents': [Document(page_content="regression problem. And the term regression sort of refers to the fact that the variable \nyou're trying to predict is a continuous value and price.  \nThere's another class of supervised learning problems which we'll talk about, which are \nclassification problems. And so, in a classifi cation problem, the variab le you're trying to \npredict is discreet rather than continuous . So as one specific example — so actually a \nstandard data set you can download online [i naudible] that

In [25]:
result['answer']

'Regression is a type of supervised learning problem in machine learning. It involves predicting a continuous value or variable based on input features. In regression, the goal is to find the relationship between the input variables and the output variable in order to make accurate predictions. The term "regression" specifically refers to the fact that the variable being predicted is continuous.'

In [26]:
result['source_documents']

[Document(page_content="regression problem. And the term regression sort of refers to the fact that the variable \nyou're trying to predict is a continuous value and price.  \nThere's another class of supervised learning problems which we'll talk about, which are \nclassification problems. And so, in a classifi cation problem, the variab le you're trying to \npredict is discreet rather than continuous . So as one specific example — so actually a \nstandard data set you can download online [i naudible] that lots of machine learning \npeople have played with. Let's say you collect  a data set on breast cancer tumors, and you \nwant to learn the algorithm to predict wh ether or not a certai n tumor is malignant. \nMalignant is the opposite of benign, right, so ma lignancy is a sort of harmful, bad tumor. \nSo we collect some number of features, some  number of properties of these tumors, and \nfor the sake of sort of having a simple [inaudi ble] explanation, let's just say that we're", me

In [27]:
import panel as pn
import param


class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query = param.String("")
    db_response = param.List([])

    def __init__(self, **params):
        super(cbfs, self).__init__(**params)
        self.panels = []
        self.loaded_file = "docs/cs229_lectures/MachineLearning-Lecture01.pdf"
        self.qa = load_db(self.loaded_file, "stuff", 4)

    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style = "outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style = "solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(
                pn.Row("User:", pn.pane.Markdown("", width=600)), scroll=True
            )
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result["answer"]
        self.panels.extend(
            [
                pn.Row("User:", pn.pane.Markdown(query, width=600)),
                pn.Row(
                    "ChatBot:",
                    pn.pane.Markdown(
                        self.answer, width=600, style={"background-color": "#F6F6F6"}
                    ),
                ),
            ]
        )
        inp.value = ""  # clears loading indicator when cleared
        return pn.WidgetBox(*self.panels, scroll=True)

    @param.depends(
        "db_query ",
    )
    def get_lquest(self):
        if not self.db_query:
            return pn.Column(
                pn.Row(
                    pn.pane.Markdown(
                        f"Last question to DB:", styles={"background-color": "#F6F6F6"}
                    )
                ),
                pn.Row(pn.pane.Str("no DB accesses so far")),
            )
        return pn.Column(
            pn.Row(
                pn.pane.Markdown(f"DB query:", styles={"background-color": "#F6F6F6"})
            ),
            pn.pane.Str(self.db_query),
        )

    @param.depends(
        "db_response",
    )
    def get_sources(self):
        if not self.db_response:
            return
        rlist = [
            pn.Row(
                pn.pane.Markdown(
                    f"Result of DB lookup:", styles={"background-color": "#F6F6F6"}
                )
            )
        ]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends("convchain", "clr_history")
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(
                pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True
            )
        rlist = [
            pn.Row(
                pn.pane.Markdown(
                    f"Current Chat History variable",
                    styles={"background-color": "#F6F6F6"},
                )
            )
        ]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self, count=0):
        self.chat_history = []
        return 

In [39]:
cb = cbfs()

file_input = pn.widgets.FileInput(accept=".pdf")
button_load = pn.widgets.Button(name="Load DB", button_type="primary")
button_clearhistory = pn.widgets.Button(name="Clear History", button_type="warning")
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput(placeholder="Enter text here…")

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp)

# jpg_pane = pn.pane.Image("./img/convchain.jpg")

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation, loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2 = pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources),
)
tab3 = pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4 = pn.Column(
    pn.Row(file_input, button_load, bound_button_load),
    pn.Row(
        button_clearhistory,
        pn.pane.Markdown("Clears chat history. Can use to start a new topic"),
    ),
    pn.layout.Divider(),
    # pn.Row(jpg_pane.clone(width=400)),
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown("# ChatWithYourData_Bot")),
    pn.Tabs(
        ("Conversation", tab1),
        ("Database", tab2),
        ("Chat History", tab3),
        ("Configure", tab4),
    ),
)
dashboard

got results for 77 in 96.36150693893433s, length of embeddings list is 77




Column
    [0] Row
        [0] Markdown(str)
    [1] Tabs
        [0] Column
            [0] Row
                [0] TextInput(placeholder='Enter text here…')
            [1] Divider()
            [2] ParamFunction(function, _pane=WidgetBox, defer_load=False, height=300, loading_indicator=True)
            [3] Divider()
        [1] Column
            [0] ParamMethod(method, _pane=Column, defer_load=False)
            [1] Divider()
            [2] ParamMethod(method, _pane=Str, defer_load=False)
        [2] Column
            [0] ParamMethod(method, _pane=WidgetBox, defer_load=False)
            [1] Divider()
        [3] Column
            [0] Row
                [0] FileInput(accept='.pdf')
                [1] Button(button_type='primary', name='Load DB')
                [2] ParamFunction(function, _pane=Markdown, defer_load=False)
            [1] Row
                [1] Markdown(str)
            [2] Divider()