In [1]:
model_api_gateway = ""

In [2]:
import requests

json_body = {
    "inputs" : [
       [{"role" : "system", "content" : "You are an expert in Python."},
       {"role" : "user", "content" : "How would I write a function to determine the Nth Fibonacci number?"}]
    ],
    "parameters" : {
        "max_new_tokens" : 256,
        "top_p" : 0.9,
        "temperature" : 0.1
    }
}

response = requests.post(model_api_gateway, json=json_body)

print(response.json())

[{'generation': {'role': 'assistant', 'content': " Great! I'd be happy to help you with that.\n\nTo determine the Nth Fibonacci number using Python, you can use the following function:\n```\ndef fibonacci(n):\n    if n <= 1:\n        return n\n    else:\n        return fibonacci(n-1) + fibonacci(n-2)\n```\nThis function uses the recursive formula for the Fibonacci sequence, where each number is the sum of the two preceding numbers (starting with 0 and 1). The function will call itself twice, once with `n-1` and once with `n-2`, and then add the results to get the final answer.\n\nHere's an example of how you could use this function:\n```\n>>> fibonacci(5)\n21\n```\nThis would return the 5th Fibonacci number, which is 21.\n\nIf you want to find the Nth Fibonacci number for some arbitrary value of N, you can simply call the function with `n` as the argument:\n```\n>>> fibonacci(20)\n894\n"}}]


In [3]:
!pip install -U -q langchain

In [4]:
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM

class Llama2SageMaker(LLM):
    max_new_tokens: int = 256
    top_p: float = 0.9
    temperature: float = 0.1

    @property
    def _llm_type(self) -> str:
        return "Llama2SageMaker"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")

        json_body = {
            "inputs" : [
              [{"role" : "user", "content" : prompt}]
            ],
            "parameters" : {
                "max_new_tokens" : self.max_new_tokens,
                "top_p" : self.top_p,
                "temperature" : self.temperature
            }
        }

        response = requests.post(model_api_gateway, json=json_body)

        return response.json()[0]["generation"]["content"]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "max_new_tokens" : self.max_new_tokens,
            "top_p" : self.top_p,
            "temperature" : self.temperature
        }

In [5]:
llm = Llama2SageMaker()

In [6]:
llm("How much wood could a woodchuck chuck if a wood chuck could chuck wood?")

" The answer to this tongue twister is a bit of a trick question! Woodchucks, also known as groundhogs, are not actually able to chuck wood. They are burrowing animals that live in the ground and are known for their ability to move soil and dig tunnels, but they do not have the ability to chuck wood.\n\nSo, if a woodchuck could chuck wood, it would be able to move a very small amount of wood, perhaps a few small branches or twigs at a time. However, it's important to remember that woodchucks are not capable of chucking wood, so this is purely a hypothetical scenario!"

In [7]:
!pip install -q -U pymupdf

In [8]:
from langchain.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("https://arxiv.org/pdf/2005.11401v4.pdf")

In [9]:
!pip install -q -U openai chromadb tiktoken

In [10]:
import getpass
import os

openai_api_key = getpass.getpass("Please provide your OpenAI API Key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key

Please provide your OpenAI API Key: ··········


In [11]:
!pip install chromadb



In [None]:
from langchain.indexes import VectorstoreIndexCreator

index = VectorstoreIndexCreator().from_loaders([loader])

In [13]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=index.vectorstore.as_retriever(), return_source_documents=True
)

In [14]:
question = "What is Retrieval Augmented Generation?"
result = qa_chain({"query": question})
result["result"]

' Based on the provided context, Retrieval Augmented Generation (RAG) is a technique that combines the strengths of retrieval-based models and generation models to improve the performance of knowledge-intensive NLP tasks. RAG models use a retrieval component to access and manipulate knowledge in a non-parametric manner, and a generation component to generate text that is factual and specific. The goal of RAG is to unify previous successes in incorporating retrieval into individual NLP tasks and show that a single retrieval-based architecture can achieve strong performance across several tasks.'

In [15]:
question = "What is Retrieval Augmented Generation?"
result = qa_chain({"query": question})
result

{'query': 'What is Retrieval Augmented Generation?',
 'result': ' Based on the provided context, Retrieval Augmented Generation (RAG) is a technique that combines the strengths of retrieval-based models and generation models to improve the performance of knowledge-intensive NLP tasks. RAG models use a retrieval component to access and manipulate knowledge in a non-parametric manner, and a generation component to generate text that is factual and specific. The goal of RAG is to unify previous successes in incorporating retrieval into individual NLP tasks and show that a single retrieval-based architecture can achieve strong performance across several tasks.',
 'source_documents': [Document(page_content='including less of emphasis on lightly editing a retrieved item, but on aggregating content from several\npieces of retrieved content, as well as learning latent retrieval, and retrieving evidence documents\nrather than related training pairs. This said, RAG techniques may work well in th

In [16]:
question_list = ['What is the title of the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context information?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the task of the Retrieval Augmented Generation (RAG) model according to the context information?', 'What advantages does RAG have over the DPR QA system in terms of generating answers?', 'What are some potential downsides of using RAG, a language model based on Wikipedia, in various scenarios?', 'What is the file name of the paper on Retrieval Augmented Generation?', 'What is the accuracy of RAG models in generating correct answers even when the correct answer is not in any retrieved document?', 'Question: What is an example of how parametric and non-parametric memories work together in BART?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the purpose of using multiple answer annotations in open-domain QA?', 'Question: Which novel by Ernest Hemingway is based on his wartime experiences?', 'What advantage do non-parametric memory models like RAG have over parametric-only models like T5 or BART?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What training setup details are mentioned in the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context information?', 'What is the title of the paper mentioned in the context information?', 'What are the three sections into which the 14th century work "The Divine Comedy" is divided?', 'What are the two components of RAG models described in the context?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the benchmark dataset used for question answering research mentioned in the provided context?', 'What are the two models proposed in the paper on Retrieval Augmented Generation?', 'What is the approach used to train the retriever and generator components in the paper on Retrieval Augmented Generation?', 'What is the best performing "closed-book" open-domain QA model mentioned in the context?', 'What is the ratio of distinct to total tri-grams for the generation tasks in the Jeopardy Question Generation Task?', 'What is the main finding of the paper on Retrieval Augmented Generation?', 'What is the main objective of the work presented in the paper on Retrieval Augmented Generation?', 'What are the limitations of large pre-trained language models in accessing and manipulating knowledge in knowledge-intensive tasks?', 'What is the main objective of RAG in the experiments conducted in the paper on Retrieval Augmented Generation?', 'What is the purpose of the MSMARCO NLG task v2.1?']

In [17]:
question_context_answer = []

for question in question_list:
  result = qa_chain({"query": question})
  answer_package = {
      "question" : question,
      "contexts" : [page.page_content for page in result["source_documents"]],
      "answer" : result["result"]
  }
  question_context_answer.append(answer_package)

  print(answer_package)

{'question': 'What is the title of the paper on Retrieval Augmented Generation?', 'contexts': ['[20] Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat, and Ming-Wei Chang. REALM:\nRetrieval-augmented language model pre-training. ArXiv, abs/2002.08909, 2020. URL https:\n//arxiv.org/abs/2002.08909.\n[21] Tatsunori B Hashimoto,\nKelvin Guu,\nYonatan Oren,\nand Percy S Liang.\nA\nretrieve-and-edit\nframework\nfor\npredicting\nstructured\noutputs.\nIn\nS.\nBengio,\nH. Wallach,\nH. Larochelle,\nK. Grauman,\nN. Cesa-Bianchi,\nand R. Garnett,\ned-\nitors,\nAdvances\nin\nNeural\nInformation\nProcessing\nSystems\n31,\npages\n10052–\n10062.\nCurran\nAssociates,\nInc.,\n2018.\nURL\nhttp://papers.nips.cc/paper/\n8209-a-retrieve-and-edit-framework-for-predicting-structured-outputs.\npdf.\n[22] Nabil Hossain, Marjan Ghazvininejad, and Luke Zettlemoyer. Simple and effective retrieve-\nedit-rerank text generation. In Proceedings of the 58th Annual Meeting of the Association for\nComputational Linguis

In [18]:
!pip install -q -U datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [19]:
import datasets
import pandas as pd

dataset = datasets.Dataset.from_pandas(pd.DataFrame(data=question_context_answer))

In [20]:
!pip install -q -U ragas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-api-core 2.11.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2

In [21]:
from ragas import evaluate

results = evaluate(dataset)

Downloading (…)lve/main/config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/57.4M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/517 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

evaluating with [answer_relevancy]


100%|██████████| 3/3 [00:55<00:00, 18.45s/it]


evaluating with [context_ relevancy]


100%|██████████| 3/3 [01:55<00:00, 38.63s/it]


evaluating with [faithfulness]


100%|██████████| 3/3 [03:01<00:00, 60.47s/it]


In [22]:
results

{'ragas_score': 0.2857, 'answer_relevancy': 0.9630, 'context_ relevancy': 0.1209, 'faithfulness': 0.8432}