### Falcon-7b-instruct Sagemaker Endpoint 

In [1]:
import json 

import boto3
from langchain import PromptTemplate, SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler

In [3]:
class ContentHandlerLLM(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
        return input_str.encode('utf-8')
      
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))        
        return response_json[0]["generated_text"]

In [2]:
endpoint_name = "falcon-7b-instruct"
aws_region = boto3.Session().region_name

In [52]:
parameters = {
    "max_new_tokens": 300,
    "return_full_text": False,
    "do_sample": True,
    "top_p": 0.3,
}

content_handler = ContentHandlerLLM()

llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    region_name=aws_region,
    model_kwargs=parameters,
    content_handler=content_handler,
)

In [53]:
llm("What is the largest mammal")

' on Earth?\nThe largest mammal on Earth is the blue whale.'

### Embedding Sagemaker Endpoint

In [4]:
from typing import Dict, List
import json

from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

`SagemakerEndpointEmbeddingsJumpStart` [Source](https://medium.com/@ryanntk/deploying-hugging-face-embedding-models-on-aws-sagemaker-a-comprehensive-tutorial-with-langchain-af8e0b405b51)

In [7]:
import time

# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function
class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings):
    def embed_documents(
        self, texts: List[str], chunk_size: int = 5
    ) -> List[List[float]]:
        """Compute doc embeddings using a SageMaker Inference Endpoint.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size defines how many input texts will
                be grouped together as request. If None, will use the
                chunk size specified by the class.

        Returns:
            List of embeddings, one for each text.
        """
        results = []
        _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size
        st = time.time()
        for i in range(0, len(texts), _chunk_size):
            response = self._embedding_func(texts[i : i + _chunk_size])
            results.extend(response)
        time_taken = time.time() - st
        print(
            f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}"
        )
        return results

In [5]:
class ContentHandler(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:
        """
        Transforms the input into bytes that can be consumed by SageMaker endpoint.
        Args:
            inputs: List of input strings.
            model_kwargs: Additional keyword arguments to be passed to the endpoint.
        Returns:
            The transformed bytes input.
        """
        # Example: inference.py expects a JSON string with a "inputs" key:
        # input_str = json.dumps({"inputs": inputs, **model_kwargs})
        input_str = json.dumps({"inputs": inputs, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> List[List[float]]:
        """
        Transforms the bytes output from the endpoint into a list of embeddings.
        Args:
            output: The bytes output from SageMaker endpoint.
        Returns:
            The transformed output - list of embeddings
        Note:
            The length of the outer list is the number of input strings.
            The length of the inner lists is the embedding dimension.
        """
        # Example: inference.py returns a JSON string with the list of
        # embeddings in a "vectors" key:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json["vectors"]

In [8]:
content_handler = ContentHandler()

emb_parameters = {"device": "cuda"}

embeddings = SagemakerEndpointEmbeddingsJumpStart(
    # credentials_profile_name="credentials-profile-name",
    endpoint_name="sentence-transformers-all-mpnet-base-v2",
    region_name=aws_region,
    content_handler=content_handler,
    model_kwargs=emb_parameters,
)

In [9]:
query_result = embeddings.embed_query("foo bar dog")
len(query_result) 

768

In [10]:
query_result = embeddings.embed_documents(["foo bar dog", "Veronica is a Pythonista"])
len(query_result) 

got results for 2 in 0.8022239208221436s, length of embeddings list is 2


2

In [8]:
len(query_result[1])

768

In [31]:
print(query_result)

[0.023010829463601112, 0.06674211472272873, 0.015658535063266754, -0.059901751577854156, 0.00899288710206747, 0.04786750301718712, 0.005673463921993971, 0.023586349561810493, -0.06605415046215057, 0.02126981131732464, -0.0002889889001380652, 0.03975634276866913, -0.03429273143410683, 0.05184069275856018, 0.02275446429848671, -0.01708190329372883, 0.017952624708414078, 0.03652235493063927, 0.07056411355733871, -0.040407828986644745, -0.00187441217713058, 0.02352127991616726, 0.004665896762162447, 0.018560048192739487, -0.047574300318956375, 0.002585913985967636, -0.018474262207746506, -0.003118740627542138, -0.06551744788885117, -0.013685131445527077, -0.04172193631529808, 0.01329813152551651, -0.017570368945598602, 0.003636210458353162, 2.1068485693831462e-06, -0.01581939496099949, -0.01604585535824299, 0.005425119306892157, 0.0680365040898323, -0.05921432375907898, -0.08718516677618027, -0.020225796848535538, -0.02191966399550438, 0.006423304323107004, -0.029958896338939667, -0.064466

In [13]:
# dir(embeddings)

![steps](image.png)

### Load Document

In [11]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

In [25]:
# loader = DirectoryLoader(
#     "docs/cs229_lectures/", glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader
# )
# pages = loader.load()
# len(pages)

100%|██████████| 3/3 [00:00<00:00,  6.00it/s]


56

In [12]:
loader = PyPDFLoader(
    "docs/cs229_lectures/MachineLearning-Lecture01.pdf",
)
pages = loader.load() 
len(pages)

22

### Split Document

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [14]:
splits = text_splitter.split_documents(pages)
len(splits)

57

### Vector store and Embedding

In [16]:
from langchain.vectorstores import Chroma


persist_directory = "docs/chromahf/"

In [17]:
# vectordb = Chroma.from_documents(
#     documents=splits, embedding=embeddings, persist_directory=persist_directory
# )

vectordb = Chroma(embedding_function=embeddings, persist_directory=persist_directory)

print(vectordb._collection.count())

57


In [18]:
vectordb.similarity_search("what did they say about matlab?")

[Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is a type of machine \nlearning algorithm for modeling time series and a few other things, so  extensions to the \nmaterials that I'll be covering in the main  lectures. And attend ance at the discussion \nsections is optional, okay?  \nSo that was all I had from l ogistics. Before we move on to start talking a bit about \nmachine learning, let me check what questions you have. Yeah?  \nStudent : [Inaudible] R or something like that?  \nInstructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're \nwelcome to use R, but I would strongly advi se against it, mainly because in the last \nproblem set, we actually supply some code th at will run in Octave  but that would be \nsomewhat painful for you to translate into R yourself. So for your other assignments, if \nyou wanna submit

### Retrieval

In [28]:
from langchain.docstore.document import Document


def pretty_print_docs(docs: List[Document]) -> None:
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


def get_metadata(docs: List[Document]) -> None:
    for doc in docs:
        print(doc.metadata)

#### MMR

In [20]:
question = "what did they say about matlab?"

docs = vectordb.max_marginal_relevance_search(question, k=2, fetch_k=3)
print(docs) 

[Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is a type of machine \nlearning algorithm for modeling time series and a few other things, so  extensions to the \nmaterials that I'll be covering in the main  lectures. And attend ance at the discussion \nsections is optional, okay?  \nSo that was all I had from l ogistics. Before we move on to start talking a bit about \nmachine learning, let me check what questions you have. Yeah?  \nStudent : [Inaudible] R or something like that?  \nInstructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're \nwelcome to use R, but I would strongly advi se against it, mainly because in the last \nproblem set, we actually supply some code th at will run in Octave  but that would be \nsomewhat painful for you to translate into R yourself. So for your other assignments, if \nyou wanna submit

In [21]:

        
get_metadata(docs)

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Working with Metadata

In [23]:
question = "what did they say about regression on page 3?"

docs = vectordb.similarity_search(
    question,
    k=2,
    filter={"page": 3},
)

pretty_print_docs(docs) 

Document 1:

So, see, most of us use learning algorithms half a dozen, a dozen, maybe dozens of times 
without even knowing it.  
And of course, learning algorithms are also  doing things like giving us a growing 
understanding of the human genome. So if so meday we ever find a cure for cancer, I bet 
learning algorithms will have had a large role in that. That's sort of the thing that Tom 
works on, yes?  
So in teaching this class, I sort of have thre e goals. One of them is just to I hope convey 
some of my own excitement a bout machine learning to you.  
The second goal is by the end of this class, I hope all of you will be ab le to apply state-of-
the-art machine learning algorithms to whatev er problems you're interested in. And if you 
ever need to build a system for reading zi p codes, you'll know how to do that by the end 
of this class.  
And lastly, by the end of this class, I reali ze that only a subset of  you are interested in 
doing research in machine learning, but by t

In [29]:
get_metadata(docs)

{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Self-Query 

In [32]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [33]:
metadata_field_info = [
    # AttributeInfo(
    #     name="source",
    #     description="The lecture the chunk is from, should be one of `docs/cs229_lectures/MachineLearning-Lecture01.pdf`, `docs/cs229_lectures/MachineLearning-Lecture02.pdf`, or `docs/cs229_lectures/MachineLearning-Lecture03.pdf`",
    #     type="string",
    # ),
    AttributeInfo(
        name="page",
        description="The page from the lecture",
        type="integer",
    ),
]

In [34]:
document_content_description = "Lecture notes"
base_llm = OpenAI(temperature=0)

retriever = SelfQueryRetriever.from_llm(
    base_llm, vectordb, document_content_description, metadata_field_info, verbose=True
) 

In [35]:
question = "what did they say about regression on page 3?"

docs = retriever.get_relevant_documents(question) 



query='regression' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='page', value=3) limit=None


In [36]:
pretty_print_docs(docs)

Document 1:

So, see, most of us use learning algorithms half a dozen, a dozen, maybe dozens of times 
without even knowing it.  
And of course, learning algorithms are also  doing things like giving us a growing 
understanding of the human genome. So if so meday we ever find a cure for cancer, I bet 
learning algorithms will have had a large role in that. That's sort of the thing that Tom 
works on, yes?  
So in teaching this class, I sort of have thre e goals. One of them is just to I hope convey 
some of my own excitement a bout machine learning to you.  
The second goal is by the end of this class, I hope all of you will be ab le to apply state-of-
the-art machine learning algorithms to whatev er problems you're interested in. And if you 
ever need to build a system for reading zi p codes, you'll know how to do that by the end 
of this class.  
And lastly, by the end of this class, I reali ze that only a subset of  you are interested in 
doing research in machine learning, but by t

In [37]:
get_metadata(docs)

{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 3, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


In [38]:
question = (
    "what did they say about regression between pages one and ten?"
)

docs = retriever.get_relevant_documents(question)

get_metadata(docs)



query='regression' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='page', value=1), Comparison(comparator=<Comparator.LTE: 'lte'>, attribute='page', value=10)]) limit=None
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 6, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 10, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


#### Compression

In [39]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [40]:
base_llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(base_llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=vectordb.as_retriever()
)

In [41]:
question = "what did they say about matlab?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

Document 1:

"I personally end up using MATLAB quite a bit more often for various reasons."
----------------------------------------------------------------------------------------------------
Document 2:

"And the student said, "Oh, it was the MATLAB." So for those of you that don't know MATLAB yet, I hope you do learn it. It's not hard, and we'll actually have a short MATLAB tutorial in one of the discussion sections for those of you that don't know it."
----------------------------------------------------------------------------------------------------
Document 3:

"MATLAB is I guess part of the programming language that makes it very easy to write codes using matrices, to write code for numerical routines, to move data around, to plot data. And it's sort of an extremely easy to learn tool to use for implementing a lot of learning algorithms."
----------------------------------------------------------------------------------------------------
Document 4:

"So one more organizational

In [42]:
docs = vectordb.similarity_search(question, k=4)
pretty_print_docs(docs)

Document 1:

So later this quarter, we'll use the discussion sections to talk about things like convex 
optimization, to talk a little bit about hidde n Markov models, which is a type of machine 
learning algorithm for modeling time series and a few other things, so  extensions to the 
materials that I'll be covering in the main  lectures. And attend ance at the discussion 
sections is optional, okay?  
So that was all I had from l ogistics. Before we move on to start talking a bit about 
machine learning, let me check what questions you have. Yeah?  
Student : [Inaudible] R or something like that?  
Instructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're 
welcome to use R, but I would strongly advi se against it, mainly because in the last 
problem set, we actually supply some code th at will run in Octave  but that would be 
somewhat painful for you to translate into R yourself. So for your other assignments, if 
you wanna submit a solution in R, that'

In [43]:
print(get_metadata(compressed_docs))
print(get_metadata(docs))

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
None
{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 8, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 7, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
None


#### Combine threshold score and Compression

In [48]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(
        search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.9}
    ),
)

In [49]:
compression_retriever.get_relevant_documents(question)



[]

#### Combine MMR and Compression

In [50]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr"),
)

docs = compression_retriever.get_relevant_documents(question)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-OKEaxoW73Lo9pocIXi1wDApB on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

In [51]:
pretty_print_docs(docs)

Document 1:

"I personally end up using MATLAB quite a bit more often for various reasons."


### Question Answering

#### RetrievalQA Chain

In [67]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    return_source_documents=True,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2}),
)

# qa_chain

In [68]:
question = "what did they say about matlab?"
result = qa_chain({"query": question}) 

result 

{'query': 'what did they say about matlab?',
 'result': '\n\nMatlab is a programming language and environment that is commonly used in data analysis, visualization, and numerical computing. It is widely used in scientific and engineering fields, including physics, engineering, and finance. It is often used to create interactive visualizations and perform complex numerical computations. It is also used to develop and test algorithms, which are mathematical procedures that can be used to solve complex problems. It is a powerful tool for data analysis and can be used to analyze large datasets, perform statistical analysis, and create complex visualizations. It is also used to develop and test algorithms, which are mathematical procedures that can be used to solve complex problems.',
 'source_documents': [Document(page_content="So later this quarter, we'll use the discussion sections to talk about things like convex \noptimization, to talk a little bit about hidde n Markov models, which is

In [70]:
pretty_print_docs(result['source_documents'])

Document 1:

So later this quarter, we'll use the discussion sections to talk about things like convex 
optimization, to talk a little bit about hidde n Markov models, which is a type of machine 
learning algorithm for modeling time series and a few other things, so  extensions to the 
materials that I'll be covering in the main  lectures. And attend ance at the discussion 
sections is optional, okay?  
So that was all I had from l ogistics. Before we move on to start talking a bit about 
machine learning, let me check what questions you have. Yeah?  
Student : [Inaudible] R or something like that?  
Instructor (Andrew Ng) : Oh, yeah, let's see, right. So our policy has been that you're 
welcome to use R, but I would strongly advi se against it, mainly because in the last 
problem set, we actually supply some code th at will run in Octave  but that would be 
somewhat painful for you to translate into R yourself. So for your other assignments, if 
you wanna submit a solution in R, that'

In [69]:
get_metadata(result['source_documents'])

{'page': 9, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}
{'page': 2, 'source': 'docs/cs229_lectures/MachineLearning-Lecture01.pdf'}


In [66]:
result['result'].strip('\n\n')

'The students were told that MATLAB was a good tool to use for their project, but that it was not necessary to use it for the term project. The instructor also stated that he personally used MATLAB more often than R.'