**Resource:** https://github.com/aws-samples/Meta-Llama-on-AWS/blob/main/RAG-recipes/llama3-rag-langchain-smjs.ipynb

* Embedding Model Used: *HuggingFace BGE Large EN Embedding model*
* LLM Used: *HuggingFace Llama 3 8b Instruct LLM model*

In [6]:
%%writefile requirements.txt
langchain==0.1.14
pypdf==4.1.0
faiss-cpu==1.8.0
boto3==1.34.58
sqlalchemy==2.0.29

Overwriting requirements.txt


In [7]:
import sqlalchemy
print(sqlalchemy.__version__)

2.0.29


In [8]:
pip install nvidia-ml-py3==7.352.0

Note: you may need to restart the kernel to use updated packages.


In [9]:
pip install sqlparse==0.5.0

Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install scikit-learn==1.3.0

Note: you may need to restart the kernel to use updated packages.


In [11]:
pip install omegaconf==2.2.3

Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install gluonts==0.15.1

Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install langchain==0.1.14

Note: you may need to restart the kernel to use updated packages.


In [14]:
pip install boto3==1.34.58

Note: you may need to restart the kernel to use updated packages.


In [15]:
!pip install -U -r requirements.txt

Collecting sqlalchemy==2.0.29 (from -r requirements.txt (line 5))
  Using cached SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Using cached SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
Installing collected packages: sqlalchemy
  Attempting uninstall: sqlalchemy
    Found existing installation: SQLAlchemy 2.0.32
    Uninstalling SQLAlchemy-2.0.32:
      Successfully uninstalled SQLAlchemy-2.0.32
Successfully installed sqlalchemy-2.0.29


In [16]:
import langchain
print(langchain.__version__)

0.1.14


In [17]:
try:
    import sagemaker
except ImportError:
    !pip install sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [18]:
# Import the JumpStartModel class from the SageMaker JumpStart library
from sagemaker.jumpstart.model import JumpStartModel

In [19]:
# Specify the model ID for the HuggingFace Llama 3 8b Instruct LLM model
model_id = "meta-textgeneration-llama-3-8b-instruct"
accept_eula = True
model = JumpStartModel(model_id=model_id, model_version="2.7.0", instance_type= "ml.g5.2xlarge")

In [15]:
predictor = model.deploy(accept_eula=accept_eula, instance_type="ml.g5.2xlarge")

----------------!

In [20]:
# Specify the model ID for the HuggingFace BGE Large EN Embedding model
model_id = "huggingface-sentencesimilarity-bge-large-en-v1-5"
text_embedding_model = JumpStartModel(model_id=model_id,model_version="1.1.1")

In [17]:
embedding_predictor = text_embedding_model.deploy(instance_type="ml.g5.2xlarge")

---------!

In [21]:
import json
import sagemaker

from langchain_core.prompts import PromptTemplate
from langchain_community.llms import SagemakerEndpoint
from langchain_community.embeddings import SagemakerEndpointEmbeddings
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

In [22]:
sess = sagemaker.session.Session()
region = sess._region_name

In [23]:
llm_endpoint_name = "meta-textgeneration-llama-3-8b-instruct-2024-10-20-23-30-32-262"
embedding_endpoint_name = "hf-sentencesimilarity-bge-large-en-v1-5-2024-10-20-23-39-05-487"

In [24]:
# testing out my endpoint
import boto3
runtime_client = boto3.client('sagemaker-runtime', region_name=region)

input_prompt = {
    "inputs": "Where is the capital of China?"
}

response = runtime_client.invoke_endpoint(
    EndpointName=llm_endpoint_name,
    ContentType='application/json',
    Body=json.dumps(input_prompt)
)

In [25]:
# read and print the output
output = json.loads(response['Body'].read().decode())
print("LLM Response:", output)

LLM Response: {'generated_text': ' The capital of China is Beijing. Beijing is located in the northern part of the country and is the second most populous city in China, with a population'}


In [26]:
from typing import Dict

class Llama38BContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        payload = {
            "inputs": prompt,
            # default parameters
            "parameters": {
                # Controls the maximum number of tokens the model can generate
                "max_new_tokens": 1000,
                # lower value makes the model more deterministic / higher value allows for more diverse responses
                "top_p": 0.9,
                # randomness
                "temperature": 0.6,
                "stop": ["<|eot_id|>"],
            },
        }
        input_str = json.dumps(
            payload,
        )
        print(input_str)
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        print(response_json)
        content = response_json["generated_text"].strip()
        return content

In [27]:
# Instantiate the content handler for Llama3-8B
llama_content_handler = Llama38BContentHandler()

# Setup for using the Llama3-8B model with SageMaker Endpoint
llm = SagemakerEndpoint(
     endpoint_name=llm_endpoint_name,
     region_name=region,
     model_kwargs={"max_new_tokens": 1024, "top_p": 0.9, "temperature": 0.7},
     content_handler=llama_content_handler
 )

In [28]:
from typing import List

class BGEContentHandlerV15(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, text_inputs: List[str], model_kwargs: dict) -> bytes:
        """
        Transforms the input into bytes that can be consumed by SageMaker endpoint.
        Args:
            text_inputs (list[str]): A list of input text strings to be processed.
            model_kwargs (Dict): Additional keyword arguments to be passed to the endpoint.
               Possible keys and their descriptions:
               - mode (str): Inference method. Valid modes are 'embedding', 'nn_corpus', and 'nn_train_data'.
               - corpus (str): Corpus for Nearest Neighbor. Required when mode is 'nn_corpus'.
               - top_k (int): Top K for Nearest Neighbor. Required when mode is 'nn_corpus'.
               - queries (list[str]): Queries for Nearest Neighbor. Required when mode is 'nn_corpus' or 'nn_train_data'.
        Returns:
            The transformed bytes input.
        """
        input_str = json.dumps(
            {
                "text_inputs": text_inputs,
                **model_kwargs
            }
        )
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> List[List[float]]:
        """
        Transforms the bytes output from the endpoint into a list of embeddings.
        Args:
            output: The bytes output from SageMaker endpoint.
        Returns:
            The transformed output - list of embeddings
        Note:
            The length of the outer list is the number of input strings.
            The length of the inner lists is the embedding dimension.
        """
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json["embedding"]

In [29]:
bge_content_handler = BGEContentHandlerV15()
sagemaker_embeddings = SagemakerEndpointEmbeddings(
    endpoint_name=embedding_endpoint_name,
    region_name=region,
    model_kwargs={"mode": "embedding"},
    content_handler=bge_content_handler,
)

In [30]:
import os
contents = os.listdir()
pdf_files = [item for item in contents if item.endswith('.pdf')]

print("Contents of the current directory:")
for item in pdf_files:
    print(item)

Contents of the current directory:
recipe-data-sagemaker.pdf


In [31]:
import numpy as np
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize an empty list to hold the documents
documents = []
metadata = [{} for _ in pdf_files]  # Initialize metadata for each PDF

# Load and process each PDF file
for idx, file in enumerate(pdf_files):
    loader = PyPDFLoader(file)
    document = loader.load()
    for document_fragment in document:
        document_fragment.metadata = metadata[idx]

    documents += document

# Set a chunk size for splitting documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
)


docs = text_splitter.split_documents(documents)
print(docs[50])

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


page_content='salad. \nVariation:\ninstead\nof\nthe\ncheese\nand \nherbs,\nstuff\neach\nchicken\nbreast\nwith \nabout\n2\nto\n4\ntablespoons\nof\nanother \nmixture\nwhich\nmight\ninclude\none\nor \nseveral\ningredients\nsuch\nas\nslivered \nroasted\npeppers,\npitted\ngood\nblack \nolives,\nsun-dried\ntomatoes,\ncapers, \nanchovies,\nhot\nchilis,\nor\nother\ncheeses \nsuch\nas\nMozzarella\nor\nFontina.'


In [32]:
avg_doc_length = lambda documents: sum([len(doc.page_content) for doc in documents])//len(documents)

print(f'Average length among {len(documents)} documents loaded is {avg_doc_length(documents)} characters.')
print(f'After the split we have {len(docs)} documents as opposed to the original {len(documents)}.')
print(f'Average length among {len(docs)} documents (after split) is {avg_doc_length(docs)} characters.')

Average length among 16 documents loaded is 2477 characters.
After the split we have 51 documents as opposed to the original 16.
Average length among 51 documents (after split) is 843 characters.


In [33]:
sample_embedding = np.array(sagemaker_embeddings.embed_query(docs[0].page_content))
print("Sample embedding of a document chunk: ", sample_embedding)
print("Size of the embedding: ", sample_embedding.shape)

Sample embedding of a document chunk:  [-0.02978267  0.02248217  0.0348905  ... -0.0368495   0.0366135
  0.00579986]
Size of the embedding:  (1024,)


In [34]:
# using FAISS for building a vector store
from langchain_community.vectorstores import FAISS
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

vectorstore_faiss = FAISS.from_documents(
    docs,
    sagemaker_embeddings,
)
wrapper_store_faiss = VectorStoreIndexWrapper(vectorstore=vectorstore_faiss)

In [35]:
prompt_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful assistant.
<|eot_id|><|start_header_id|>user<|end_header_id|>
{query}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["query"]
)

In [36]:
# querying the FAISS vector store for documents relevant to the user's question.
query = "How to cook a dish with pork tenderloin?"

In [37]:
answer = wrapper_store_faiss.query(question=PROMPT.format(query=query), llm=llm)
print(answer)

{"inputs": "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\ntitle\ningredients\ninstructions\nApricot\nSpritzer\n1\ncup\napricot\nnectar,\n1\nbottle\nsparkling \nwine,\nwell\nchilled\nDivide\nthe\napricot\nnectar\namong\n4 \nchilled\nchampagne\nflutes\nor\ncoupe \nglasses.\nTop\nwith\nthe\nsparkling\nwine \nand\nserve\nimmediately.\nPork\nTenderloin\nwith\nPeach\nand\nPecan \nSauce\n1\ntablespoon\nolive\noil,\n1\npork \ntenderloin\n(about\n1\npound),\ncut\ninto \n3/4-inch-thick\nslices,\n2\ncloves\ngarlic, \nminced,\n2\ngreen\nonions,\nsliced\n(about \n1/4\ncup),\n1\ncan\n(10\n3/4\nounces) \nCampbell's\u00ae\nCondensed\nGolden \nMushroom\nSoup,\n1\ncan\n(about\n15 \nounces)\nsliced\npeaches\nin\njuice, \ndrained,\nreserving\njuice,\n3\ntablespoons \nlow-sodium\nsoy\nsauce,\n2\ntablespoons \nhoney,\n1/4\ncup\npecan\nhalves,\ntoasted \nand\nbroken\ninto\nlarge\npieces,\nH

In [38]:
query_2 = "I have brussel sprouts and carrots, can you give me a healthy dairy-free recipe to make."

In [39]:
answer = wrapper_store_faiss.query(question=PROMPT.format(query=query_2), llm=llm)
print(answer)

{"inputs": "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nbutter,\n1\nteaspoon\nsalt,\n2\ncups\nwhite \nwine,\n10\nounces\ncanned\nbeef \nconsomme,\n10\nounces\nchicken\nbroth, \n10\nounces\napple\ncider\n(unfiltered\nis \nbest),\nBouquet\ngarni;\nthyme\nsprigs, \nbay\nleaf\nand\nparsley\ntied\ntogether\nwith \nkitchen\nstring,\n1\nloaf\ncountry\nstyle \nbread,\nKosher\nsalt,\nGround\nblack \npepper,\nSplash\nof\nCognac\n(optional),\n1 \ncup\nFontina\nor\nGruyere\ncheese,\ngrated\nfinely\nslice\ninto\nhalf-moon\nshapes.\nSet \nelectric\nskillet\nto\n300\ndegrees\nand\nadd \nbutter.\nOnce\nbutter\nhas\nmelted\nadd\na \nlayer\nof\nonions\nand\nsprinkle\nwith\na \nlittle\nsalt.\nRepeat\nlayering\nonions\nand \nsalt\nuntil\nall\nonions\nare\nin\nthe\nskillet. \nDo\nnot\ntry\nstirring\nuntil\nonions\nhave \nsweated\ndown\nfor\n15\nto\n20\nminutes. \nAfter\nthat,\nstir\nocc

In [40]:
from langchain.chains import RetrievalQA

prompt_template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

This is a conversation between an AI assistant and a Human.

<|eot_id|><|start_header_id|>user<|end_header_id|>

Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
#### Context ####
{context}
#### End of Context ####

Question: {question}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore_faiss.as_retriever(
        # how many relevant documents should be retrieved
        search_type="similarity", search_kwargs={"k": 3}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [41]:
query = "I have a dairy allergy. Can you provide a recipe that's dairy-free?"
result = qa.invoke({"query": query})
print(result['result'][0])

# Print the source documents
print(f"\n{result['source_documents']}")

{"inputs": "\n<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nThis is a conversation between an AI assistant and a Human.\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nUse the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n#### Context ####\nbutter,\n1\nteaspoon\nsalt,\n2\ncups\nwhite \nwine,\n10\nounces\ncanned\nbeef \nconsomme,\n10\nounces\nchicken\nbroth, \n10\nounces\napple\ncider\n(unfiltered\nis \nbest),\nBouquet\ngarni;\nthyme\nsprigs, \nbay\nleaf\nand\nparsley\ntied\ntogether\nwith \nkitchen\nstring,\n1\nloaf\ncountry\nstyle \nbread,\nKosher\nsalt,\nGround\nblack \npepper,\nSplash\nof\nCognac\n(optional),\n1 \ncup\nFontina\nor\nGruyere\ncheese,\ngrated\nfinely\nslice\ninto\nhalf-moon\nshapes.\nSet \nelectric\nskillet\nto\n300\ndegrees\nand\nadd \nbutter.\nOnce\nbutter\nhas\nmelted\nadd\na \nlayer\nof\nonions\nand\nsprinkle\n

In [42]:
query = "I have some blueberries at home. Can you provide me a recipe?"
result = qa.invoke({"query": query})

# Print the source documents
# print(result['source_documents'])

{"inputs": "\n<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nThis is a conversation between an AI assistant and a Human.\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nUse the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n#### Context ####\nlime\nor \nlemon\nwedges. \nPhotograph\nby\nLevi\nBrown\nBlueberry-Poppy\nSeed\nLoaves\n1\n1/3\ncups\nvegetable\noil,\nplus\nmore\nfor \nbrushing,\n3\ncups\nall-purpose\nflour,\n1 \n1/2\nteaspoons\nbaking\npowder,\n2\n1/2 \ncups\nsugar,\nPinch\nof\nsalt,\n1\n1/2\ncups \nwhole\nmilk,\n3\nlarge\neggs,\n1\n1/2 \ntablespoons\npoppy\nseeds,\n1\n1/2 \nteaspoons\nalmond\nextract,\n1\n1/2 \nteaspoons\nvanilla\nextract,\n1\ncup \nblueberries\nPreheat\nthe\noven\nto\n350\ndegrees\nF. \nBrush\n4\nmini\nloaf\npans\n(about\n5\nby\n3 \ninches\neach)\nwith\nvegetable\noil. \nWhisk\nthe\nflour,\nbaking\npowder,

In [46]:
print(result['source_documents'])

[Document(page_content='lime\nor \nlemon\nwedges. \nPhotograph\nby\nLevi\nBrown\nBlueberry-Poppy\nSeed\nLoaves\n1\n1/3\ncups\nvegetable\noil,\nplus\nmore\nfor \nbrushing,\n3\ncups\nall-purpose\nflour,\n1 \n1/2\nteaspoons\nbaking\npowder,\n2\n1/2 \ncups\nsugar,\nPinch\nof\nsalt,\n1\n1/2\ncups \nwhole\nmilk,\n3\nlarge\neggs,\n1\n1/2 \ntablespoons\npoppy\nseeds,\n1\n1/2 \nteaspoons\nalmond\nextract,\n1\n1/2 \nteaspoons\nvanilla\nextract,\n1\ncup \nblueberries\nPreheat\nthe\noven\nto\n350\ndegrees\nF. \nBrush\n4\nmini\nloaf\npans\n(about\n5\nby\n3 \ninches\neach)\nwith\nvegetable\noil. \nWhisk\nthe\nflour,\nbaking\npowder,\nsugar \nand\nsalt\nin\na\nlarge\nbowl.\nBeat\nthe\nmilk, \n1\n1/3\ncups\nvegetable\noil,\nthe\neggs, \npoppy\nseeds,\nand\nalmond\nand\nvanilla \nextracts\nin\na\nstand\nmixer\nfitted\nwith \nthe\npaddle\nattachment\non\nmedium \nspeed\nuntil\ncombined.\nGradually\nbeat \nin\nthe\nflour\nmixture\nuntil\nwell \ncombined.\nFold\nthe\nblueberries\ninto \nthe\nbatter\nwith\

In [54]:
print(result['result'])

Yes, I can help you with that! Based on the context you provided, I found a recipe for Blueberry-Poppy Seed Loaves. Here it is:

Ingredients:

* 1 1/3 cups vegetable oil, plus more for brushing
* 3 cups all-purpose flour
* 1 1/2 teaspoons baking powder
* 2 1/2 cups sugar
* Pinch of salt
* 1 1/2 cups whole milk
* 3 large eggs
* 1 1/2 tablespoons poppy seeds
* 1 1/2 teaspoons almond extract
* 1 1/2 teaspoons vanilla extract
* 1 cup blueberries

Instructions:

1. Preheat the oven to 350°F. Brush 4 mini loaf pans with vegetable oil.
2. Whisk together flour, baking powder, sugar, and salt in a large bowl.
3. Beat the milk, oil, eggs, poppy seeds, almond extract, and vanilla extract in a stand mixer until combined.
4. Gradually beat in the flour mixture until well combined.
5. Fold in the blueberries.
6. Divide the batter among the prepared pans and bake until golden on top and a toothpick inserted into the center comes out clean, about 1 hour to 1 hour 15 minutes.

I hope you enjoy this rec

### Ragas Evaluation

In [None]:
pip install pydantic==2.8

In [1]:
pip install ragas

Note: you may need to restart the kernel to use updated packages.


In [43]:
import ragas
from datasets import Dataset
# os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision
)

In [57]:
questions = [query] #Ragas only takes lists
# contexts = result['source_documents']
answer = [result['result']]

import ast
# contexts = [ast.literal_eval(item) for item in contexts] # to make it a list of lists for Ragas

data = {
    "question": questions,
    "answer": answer,
    # "contexts": contexts,
}

dataset = Dataset.from_dict(data)
dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 1
})

In [84]:
#Still figuring out the steps to store an environment variable that isn't in plaintext
# import os

In [89]:
# api_key = os.environ.get('OPENAI_API_KEY')

In [95]:
%env OPENAI_API_KEY=#Insert open AI API key here

env: OPENAI_API_KEY=#Insert open AI API key here


In [94]:
score = ragas.evaluate(dataset,metrics=[answer_relevancy], raise_exceptions = False)
score

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

{'answer_relevancy': 0.8959}

In [None]:
# TODO: Other Ragas metrics as well