* Meta Llama 3.1 70B Instruct Neuron
*  Mistral 7b instruct

In [1]:
%pip install --quiet --upgrade sagemaker jmespath

Note: you may need to restart the kernel to use updated packages.


In [2]:
from ipywidgets import Dropdown
from sagemaker.jumpstart.notebook_utils import list_jumpstart_models


dropdown = Dropdown(
    options=list_jumpstart_models("search_keywords includes Text Generation"),
    value="huggingface-llm-mistral-7b-instruct",
    description="Select a JumpStart text generation model:",
    style={"description_width": "initial"},
    layout={"width": "max-content"},
)
display(dropdown)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


Dropdown(description='Select a JumpStart text generation model:', index=48, layout=Layout(width='max-content')…

In [3]:
model_id = dropdown.value
model_version = "3.9.0"

In [4]:
from sagemaker.jumpstart.model import JumpStartModel

model = JumpStartModel(model_id=model_id, model_version=model_version, instance_type= "ml.g5.2xlarge")

In [5]:
# Generate predictor - do not run predictor later in the code
predictor = model.deploy()

----------------!

In [6]:
example_payloads = model.retrieve_all_examples()

In [7]:
import jmespath


for payload in example_payloads:
    response = predictor.predict(payload.body)
    generated_text = jmespath.search(payload.raw_payload["output_keys"]["generated_text"], response)
    print("Input:\n", payload.body[payload.prompt_key])
    print("Output:\n", generated_text.strip())
    print("\n===============\n")

Input:
 <|begin_of_text|><|start_header_id|>user<|end_header_id|>

what is the recipe of mayonnaise?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


Output:
 Mayonnaise is a thick, creamy sauce that is made by emulsifying oil into an egg yolk mixture. Here's a simple recipe for homemade mayonnaise:

Ingredients:
- 1 egg yolk
- 1 teaspoon Dijon mustard
- 1 cup vegetable oil
- 1-2 tablespoons white wine vinegar or lemon juice
- Salt to taste

Instructions:
1. In a large bowl, whisk together the egg yolk and mustard until they are well combined.
2. Start adding the oil very slowly, drop by drop, while continuously whisking the mixture. This is to ensure that the oil is being emulsified properly and the mayonnaise is not breaking.
3. Once you've added about 1/4 cup of oil and the mayonnaise has thickened, you can start adding the oil in a thin, steady stream, still whisking constantly.
4. Once all the oil has been added, whisk in the vinegar or lemon juice and salt to taste.
5. If

## Recipes

In [1]:
import json
import sagemaker

from langchain_core.prompts import PromptTemplate
from langchain_community.llms import SagemakerEndpoint
from langchain_community.embeddings import SagemakerEndpointEmbeddings
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [26]:
import json
from typing import List, Dict
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import SagemakerEndpoint
from langchain_community.embeddings import SagemakerEndpointEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os

# Use the existing endpoint we created
#endpoint_name = predictor.endpoint_name  # Get the endpoint name from your existing predictor

In [47]:
# Deploy the Mistral model (which you already have)
#model_id = "huggingface-llm-mistral-7b-instruct"
#accept_eula = True
#model = JumpStartModel(model_id=model_id, model_version="3.9.0")
#predictor = model.deploy(accept_eula=accept_eula, instance_type="ml.g5.2xlarge")

# Deploy the embedding model
model_id_embedding = "huggingface-sentencesimilarity-bge-large-en-v1-5"
text_embedding_model = JumpStartModel(model_id=model_id_embedding, model_version="1.1.1")
embedding_predictor = text_embedding_model.deploy(accept_eula=True, instance_type="ml.g5.2xlarge")

# Print both endpoint names
print("LLM Endpoint:", predictor.endpoint_name)
print("Embedding Endpoint:", embedding_predictor.endpoint_name)

In [27]:
# Set up constants for endpoints
LLM_ENDPOINT = "hf-llm-mistral-7b-instruct-2024-10-24-04-16-05-903"
EMBEDDING_ENDPOINT = "hf-sentencesimilarity-bge-large-en-v1-5-2024-10-24-04-26-55-749"

In [28]:
class MistralContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"
    
    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        payload = {
            "inputs": prompt,
            # default parameters
            "parameters": {
                # Controls the maximum number of tokens the model can generate
                "max_new_tokens": 700, #started at 1000
                # lower value makes the model more deterministic / higher value allows for more diverse responses
                "top_p": 0.9,
                # randomness
                "temperature": 0.4, #started at 0.6
                "stop": ["<|eot_id|>"],
            },
        }
        input_str = json.dumps(
            payload,
        )
        print(input_str)
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        print(response_json)
        content = response_json["generated_text"].strip()
        return content

In [40]:
from typing import List
import json

# Define the embedding
class BGEContentHandlerV15(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"
    
    def transform_input(self, text_inputs: List[str], model_kwargs: dict) -> bytes:
        input_str = json.dumps({
            "text_inputs": text_inputs,
            "mode": "embedding"
        })
        return input_str.encode("utf-8")
    
    def transform_output(self, output: bytes) -> List[List[float]]:
        response_json = json.loads(output.read().decode("utf-8"))
        if isinstance(response_json, list):
            return response_json
        if "embedding" in response_json:
            return response_json["embedding"]
        if "embeddings" in response_json:
            return response_json["embeddings"]
        return response_json[0]

In [41]:
# Initialize SageMaker and Endpoints
sess = sagemaker.Session()
region = sess.boto_session.region_name

llm = SagemakerEndpoint(
    endpoint_name="hf-llm-mistral-7b-instruct-2024-10-24-04-16-05-903",
    region_name=region,
    content_handler=MistralContentHandler()
)

sagemaker_embeddings = SagemakerEndpointEmbeddings(
    endpoint_name="hf-sentencesimilarity-bge-large-en-v1-5-2024-10-24-04-26-55-749",
    region_name=region,
    content_handler=BGEContentHandlerV15()
)

In [81]:
# Document Processing Class
class DocumentProcessor:
    def __init__(self, chunk_size=1000, chunk_overlap=100):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\nRecipe:", "\n\n", "\n", ". "],
            length_function=len,
            keep_separator=True
        )
    
    def clean_text(self, text: str) -> str:
        """Clean up text by removing unnecessary whitespace and newlines."""
        # Replace multiple spaces with single space
        text = re.sub(r'\s+', ' ', text)
        # Remove newlines between words
        text = re.sub(r'\n+', ' ', text)
        return text.strip()
    
    def load_pdf_documents(self, pdf_directory: str = "."):
        documents = []
        pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf')]
        
        for pdf_file in pdf_files:
            try:
                loader = PyPDFLoader(os.path.join(pdf_directory, pdf_file))
                raw_docs = loader.load()
                # Clean each document's content
                for doc in raw_docs:
                    doc.page_content = self.clean_text(doc.page_content)
                documents.extend(raw_docs)
                print(f"Processed: {pdf_file}")
            except Exception as e:
                print(f"Error processing {pdf_file}: {e}")
        
        return documents
    
    def split_documents(self, documents):
        """Split documents while preserving Document structure."""
        if not documents:
            return []
        
        split_docs = []
        for doc in documents:
            # Split the text
            splits = self.text_splitter.split_text(doc.page_content)
            # Create new Document objects for each split
            for i, split in enumerate(splits):
                metadata = doc.metadata.copy()
                metadata["chunk_index"] = i
                from langchain_core.documents import Document
                split_docs.append(Document(page_content=split, metadata=metadata))
        
        return split_docs

# Now use the corrected processor
from langchain_core.documents import Document

# System Initialization with RAG parameters
def initialize_recipe_system(pdf_directory: str):
    processor = DocumentProcessor(chunk_size=1000, chunk_overlap=100)
    documents = processor.load_pdf_documents(pdf_directory)
    chunks = processor.split_documents(documents)
    print(f"Created {len(chunks)} chunks from the documents")
    
    # Create FAISS vector store with metadata
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=sagemaker_embeddings
    )
    return vectorstore

import re

In [82]:
import re
from typing import List, Dict

# Extract allergens
def extract_allergens(query: str, allergens: Dict[str, List[str]]) -> List[str]:
    """Extract mentioned allergens from the query."""
    mentioned_allergens = []
    query_lower = query.lower()
    
    for category, allergen_list in allergens.items():
        for allergen in allergen_list:
            if allergen.lower() in query_lower:
                mentioned_allergens.append(allergen)
    
    return mentioned_allergens

def query_recipes(query: str, vectorstore, allergens: Dict[str, List[str]]):
    mentioned_allergens = extract_allergens(query, allergens)
    print(f"Allergens found in query: {mentioned_allergens}")
    
    docs = vectorstore.similarity_search(query, k=20)
    
    # More strict filtering
    if mentioned_allergens:
        filtered_docs = []
        for doc in docs:
            content_lower = doc.page_content.lower()
            if not any(allergen.lower() in content_lower for allergen in mentioned_allergens):
                filtered_docs.append(doc)
        docs = filtered_docs[:10]
        print(f"Filtered to {len(docs)} documents after removing allergen-containing content")
    
    # Simplified prompt for clarity
    prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Generate a {recipe_type} recipe that is safe for people with {allergens} allergies. The recipe should be concise but complete.
<|eot_id|><|start_header_id|>user<|end_header_id|>
{user_request}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

    print("\nGenerating recipe...")
    try:
        response = llm.invoke(
            prompt,
            model_kwargs={
                "temperature": 0.4,
                "max_new_tokens": 700,
                "top_p": 0.5,
                "repetition_penalty": 1.05,
                "do_sample": False,
                "stop": ["<|eot_id|>"]
            }
        )
        if len(response) < 100:
            print("Response is too short, considering further adjustment...")
            return retry_with_simpler_prompt(llm)
        return response
    except Exception as e:
        print(f"Error during invocation: {e}")
        return "Error generating recipe. Please try again."

In [90]:
import re
from typing import List, Dict

# Extract allergens
def extract_allergens(query: str, allergens: Dict[str, List[str]]) -> Dict[str, List[str]]:
    """Extract mentioned allergens from the query."""
    mentioned_allergens = {}
    query_lower = query.lower()
    
    for category, allergen_list in allergens.items():
        found_allergens = [allergen for allergen in allergen_list if allergen.lower() in query_lower]
        if found_allergens:
            mentioned_allergens[category] = found_allergens
    
    return mentioned_allergens

def query_recipes(query: str, vectorstore, allergens: Dict[str, List[str]]):
    mentioned_allergens = extract_allergens(query, allergens)
    print(f"Allergens found in query: {mentioned_allergens}")
    
    docs = vectorstore.similarity_search(query, k=20)
    
    # More strict filtering
    if mentioned_allergens:
        filtered_docs = []
        for doc in docs:
            content_lower = doc.page_content.lower()
            if not any(any(allergen.lower() in content_lower for allergen in allergen_list) for allergen_list in mentioned_allergens.values()):
                filtered_docs.append(doc)
        docs = filtered_docs[:10]
        print(f"Filtered to {len(docs)} documents after removing allergen-containing content")
    
    # Format the prompt with the relevant information
    allergen_list = [", ".join(allergen_list) for allergen_list in mentioned_allergens.values()]
    allergens_str = ", ".join(allergen_list) if allergen_list else "none specified"
    
    prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Generate a recipe that is safe for people with {allergens} allergies. The recipe should be concise but complete.
<|eot_id|><|start_header_id|>user<|end_header_id|>
{user_request}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>""".format(
        allergens=allergens_str,
        user_request=query
    )
    
    print("\nGenerating recipe...")
    try:
        response = llm.invoke(
            prompt,
            model_kwargs={
                "temperature": 0.4,
                "max_new_tokens": 700,
                "top_p": 0.5,
                "repetition_penalty": 1.05,
                "do_sample": False,
                "stop": ["<|eot_id|>"]
            }
        )
        if len(response) < 100:
            print("Response is too short, considering further adjustment...")
            return retry_with_simpler_prompt(llm)
        return response
    except Exception as e:
        print(f"Error during invocation: {e}")
        return "Error generating recipe. Please try again."

In [84]:
# Main Execution
pdf_directory = "."
vectorstore = initialize_recipe_system(pdf_directory)

# Load allergens
with open("allergens.json", "r") as f:
    allergens = json.load(f)

Processed: recipe-data-sagemaker.pdf
Processed: recipe-data-sagemaker2.pdf
Created 97 chunks from the documents


In [85]:
# allergy = Nut & Treenut / cousine = Thai
# Example Usage
query = "I am allergic to nuts and tree nuts. Can you suggest a Thai curry recipe that is completely nut-free?"
response = query_recipes(query, vectorstore, allergens)
print(response)

Allergens found in query: []

Generating recipe...
{"inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nGenerate a recipe that is safe for people with none specified allergies. The recipe should be concise but complete.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\nI am allergic to nuts and tree nuts. Can you suggest a Thai curry recipe that is completely nut-free?\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>", "parameters": {"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.4, "stop": ["<|eot_id|>"]}}
{'generated_text': "\nAbsolutely, I'd be happy to help you with that! Here's a simple and delicious Thai Green Curry recipe that is nut-free.\n\nIngredients:\n- 1 pound boneless, skinless chicken breasts or thighs, cut into bite-sized pieces\n- 2 tablespoons vegetable oil\n- 1 can (14 ounces) coconut milk\n- 1-2 tablespoons Thai green curry paste (make sure it's nut-free)\n- 1 can (14 ounces) bamboo shoots, drained and rinsed\n- 1 can (14 ounces

In [74]:
# allergy = Gluten (Wheat) / cousine = Italian
# Example Usage
query = "I have celiac disease. Please suggest an Italian polenta-based recipe that's naturally gluten-free."
response = query_recipes(query, vectorstore, allergens)
print(response)

Allergens found in query: []

Generating recipe...
{"inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nGenerate a recipe that is safe for people with none specified allergies. The recipe should be concise but complete.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\nI have celiac disease. Please suggest an Italian polenta-based recipe that's naturally gluten-free.\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>", "parameters": {"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.4, "stop": ["<|eot_id|>"]}}
{'generated_text': "\nTitle: Gluten-Free Italian Polenta with Tomato Sauce and Grilled Vegetables\n\nIngredients:\n- 1 cup (250 ml) corn polenta (ensure it's certified gluten-free)\n- 4 cups (1 L) water\n- 1 tbsp (15 ml) olive oil\n- Salt, to taste\n- 1 lb (500 g) assorted vegetables (bell peppers, zucchini, eggplant, etc.)\n- 2 tbsp (30 ml) balsamic vinegar\n- 1 tbsp (15 ml) olive oil\n- 1 onion, finely chopped\n- 2 cloves garlic, minced\n- 1 can

In [86]:
# allergy = Milk & Egg / cousine = Dessert
# Example Usage
query = "Looking for an ice cream alternative - I'm allergic to milk and eggs. Can you suggest a frozen dessert recipe?"
response = query_recipes(query, vectorstore, allergens)
print(response)

Allergens found in query: []

Generating recipe...
{"inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nGenerate a recipe that is safe for people with none specified allergies. The recipe should be concise but complete.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\nLooking for an ice cream alternative - I'm allergic to milk and eggs. Can you suggest a frozen dessert recipe?\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>", "parameters": {"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.4, "stop": ["<|eot_id|>"]}}
{'generated_text': "\nAbsolutely! Here's a simple and delicious recipe for a vegan sorbet made with fruits. This recipe is free from common allergens like milk, eggs, peanuts, tree nuts, soy, and gluten.\n\n**Fruit Sorbet**\n\n**Ingredients:**\n1. 5 cups (1.2 kg) of your favorite fresh fruits (berries, mango, pineapple, etc.)\n2. 1/2 cup (100 g) of granulated sugar (or to taste)\n3. 1 cup (250 ml) of water\n\n**Instructions:**\n1. Wash 

In [92]:
# allergy = Soy / cousine = Vegan
# Example Usage
query = "I am allergic to soy and eat vegan. Can you share a recipe for a protein-rich main dish using legumes instead of soy products?"
response = query_recipes(query, vectorstore, allergens)
print(response)

Allergens found in query: {}

Generating recipe...
{"inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nGenerate a recipe that is safe for people with none specified allergies. The recipe should be concise but complete.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\nI am allergic to soy and eat vegan. Can you share a recipe for a protein-rich main dish using legumes instead of soy products?\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>", "parameters": {"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.4, "stop": ["<|eot_id|>"]}}
{'generated_text': "\nAbsolutely! Here's a simple yet delicious recipe for a Chickpea Curry that is soy-free and vegan.\n\n**Ingredients:**\n1. 2 cans (15 oz each) chickpeas, drained and rinsed\n2. 1 onion, chopped\n3. 2 cloves garlic, minced\n4. 1 inch ginger, grated\n5. 1 red bell pepper, chopped\n6. 1 green bell pepper, chopped\n7. 1 jalapeno pepper, seeded and minced (optional)\n8. 1 can (14 oz) crushed tomatoes\n9. 

In [91]:
# allergy = Fish & Crustacean / cousine = Japanese
# Example Usage
query = "I have seafood allergies (fish and shellfish). Can you provide a recipe for vegetable tempura with a dipping sauce that doesn't contain dashi or fish sauce?"
response = query_recipes(query, vectorstore, allergens)
print(response)

Allergens found in query: {'meat_items': ['fish'], 'seafood_items': ['fish']}
Filtered to 10 documents after removing allergen-containing content

Generating recipe...
{"inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nGenerate a recipe that is safe for people with fish, fish allergies. The recipe should be concise but complete.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\nI have seafood allergies (fish and shellfish). Can you provide a recipe for vegetable tempura with a dipping sauce that doesn't contain dashi or fish sauce?\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>", "parameters": {"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.4, "stop": ["<|eot_id|>"]}}
{'generated_text': "\nAbsolutely! Here's a simple and delicious recipe for Vegetable Tempura with a Soy-Ginger Dipping Sauce that is safe for people with fish allergies.\n\n**Vegetable Tempura**\n\nIngredients:\n- 1 cup (240 ml) cold water\n- 1 1/2 cups (200 g) all-purpose flour\n