In [9]:
import os
import hashlib
from langchain_community.vectorstores import Qdrant
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline
from langchain.llms import HuggingFacePipeline
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
base_embeddings = HuggingFaceEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.6.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





#### Load data

In [13]:
df = pd.read_csv('./food_recipes.csv')

In [45]:
# Fill NaN values with empty strings
df.fillna('', inplace=True)

In [46]:
text_splitter_recursive = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)

### Create and chunk documents

In [47]:
def create_documents(df):
    documents = []
    for index, row in df.iterrows():
        metadata = {
            'recipe_title': row['recipe_title'],
            'url': row['url'],
            'rating': row['rating'],
            'cuisine': row['cuisine'],
            'course': row['course'],
            'diet': row['diet'],
            'author': row['author']
        }

        # Combine all text fields that will be chunked
        text = f"{row['description']} {row['ingredients']} {row['instructions']} {row['tags']}"

        # Create a Document object
        doc = Document(page_content=text, metadata=metadata)
        documents.append(doc)
    
    return documents

In [48]:
documents = create_documents(df)

In [49]:
documents[0]

Document(page_content=' is a quicker version pizza to satisfy your cravings. It is a very quick and easy recipe for days that you do not feel like cooking a full fledged meal. With the preference of toppings of your choice this pizza recipe is definitely a winner at any home. The toppings used in this  has some roasted peppers, mushroom with loaded cheese and marinara sauce. Enjoy this easy recipe with your favorite toppings.\xa0 This is a great recipe, if you are looking for an Indian/Fusion Pizza or a Homemade Pizza recipe. Serve  along with  \xa0and   for a weekend night dinner. If you like this recipe, you can also try other  for your weekend dinner: Tortillas|Extra Virgin Olive Oil|Garlic|Mozzarella cheese|Red Yellow or Green Bell Pepper (Capsicum)|Onions|Kalmatta olives|Button mushrooms To begin making the Roasted Peppers And Mushroom Tortilla Pizza recipe, turn your oven setting to broiler and keep it ready.|Grease your cast iron skillet with some olive oil and heat it over high

In [50]:
def split_documents_with_metadata(documents, text_splitter):
    split_docs = []
    for doc in documents:
        chunks = text_splitter.split_text(doc.page_content)
        for i, chunk in enumerate(chunks):
            split_docs.append(Document(page_content=chunk, metadata={**doc.metadata, "chunk_id": i}))
    return split_docs

In [51]:
split_documents = split_documents_with_metadata(documents, text_splitter_recursive)

In [52]:
# Generates a hash of the document text, strips out duplicates
def deduplicate_documents(documents):
    seen_hashes = set()
    unique_documents = []
    for doc in documents:
        doc_hash = hashlib.md5(doc.page_content.encode('utf-8')).hexdigest()
        if doc_hash not in seen_hashes:
            seen_hashes.add(doc_hash)
            unique_documents.append(doc)
    return unique_documents

In [40]:
# Include university metadata in doc context

def format_docs(docs, max_length=20000):
    formatted_docs = []
    for doc in docs:
        formatted_docs.append(f"Metadata: {doc.metadata['university']}\nSource: {doc.metadata['source']}\n\n{doc.page_content}")
    content = "\n\n".join(formatted_docs)
    tokens = tokenizer.tokenize(content)
    if len(tokens) > max_length:
        truncated_tokens = tokens[:max_length]
        return tokenizer.convert_tokens_to_string(truncated_tokens)
    return content


In [41]:
len(split_documents)

21209

In [42]:
split_documents = deduplicate_documents(split_documents)

In [43]:
len(split_documents)

21209

In [26]:
split_documents[0]

Document(page_content="\nWriting the Personal Statement - Berkeley Graduate Division\n\nSkip to content\n\n\t\t\t\t\t\t\t\t\tApply Now\t\t\t\t\t\t\t\t\n\nToggle Menu\n\nStudent Resources\n\nStaff Resources\n (opens in a new tab)\n\nNews & Events\n\n\t\t\t\t\t\t\t\t\tApply Now\t\t\t\t\t\t\t\t\n\n\t\t\t\t\t\t\t\t\t\t\t\tToggle Search\t\t\t\t\t\t\t\t\t\t\t\n\nSearch for:\n\n\t\t\t\tSearch\t\t\t\n\nAbout\nAbout\nAbout\nThe Graduate Division serves more than 13,000 students in over 100 graduate degree programs. We are here to help you from the time you are admitted until you complete your graduate program. About the Graduate Division\n\nContact the Graduate Division\n\nWelcome from the Vice Provost and Dean\n\nDiversity Initiatives\n\nGraduate Diversity Task Force Recommendations\n\nAnti-Racism Initiatives\n\nLand Acknowledgement\n\nAdmissions\nAdmissions\nAdmissions\nWe're thrilled you're considering Berkeley for your graduate study. We offer more than 100 programs for master's, profession

#### Load documents into vector db

In [53]:
vectorstore = Qdrant.from_documents(split_documents,
    base_embeddings,
    location=":memory:",
)

In [60]:
retriever = vectorstore.as_retriever()

# Consumes ~ 7.6 GB VRAM to documents into db (using recursive chunking)

In [62]:
!nvidia-smi

Sat Jun 22 22:14:42 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.02              Driver Version: 555.42.02      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:01:00.0  On |                  Off |
|  0%   45C    P5             37W /  450W |    7619MiB /  24564MiB |     37%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


### Prompt definitions

In [70]:
# We will need to tune this prompt

baseline_user_prompt = """
You are a helpful assistant and expert in cooking recipes.
You will be provided a list of relevant context about relevant food recipes
which includes metadata like ingredients and cooking instructions.

Provide a response to the user prompt about food with recommended recipes and instructions.

### Here is a context:
{context} 

### Here is a user prompt:
{question}
"""

In [71]:
rag_prompt = ChatPromptTemplate.from_template(baseline_user_prompt)

In [72]:
rag_prompt.invoke({
    "context": ["abcd", "efgh"],
    "question": "jklmnop?"
})

ChatPromptValue(messages=[HumanMessage(content="\nYou are a helpful assistant and expert in cooking recipes.\nYou will be provided a list of relevant context about relevant food recipes\nwhich includes metadata like ingredients and cooking instructions.\n\nProvide a response to the user prompt about food with recommended recipes and instructions.\n\n### Here is a context:\n['abcd', 'efgh'] \n\n### Here is a user prompt:\njklmnop?\n")])

### Pipe retriever and llm together

#### Try with OpenAI GPT3.5 as baseline

##### Requires OPENAI_API_KEY env var to be set

In [91]:
os.environ['OPENAI_API_KEY'] = ""

In [74]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo")

In [81]:
def format_docs(docs, max_length=20000):
    formatted_docs = []
    for doc in docs:
        formatted_docs.append(f"Metadata: {doc.metadata}\n")
    content = "\n\n".join(formatted_docs)
    # tokens = tokenizer.tokenize(content)
    # if len(tokens) > max_length:
    #     truncated_tokens = tokens[:max_length]
    #     return tokenizer.convert_tokens_to_string(truncated_tokens)
    return content

In [82]:
rag_chain = (
    {"context": retriever | format_docs,
     "question": RunnablePassthrough() }
    | rag_prompt
    | llm
)

In [83]:
rag_chain.invoke("I want to make a healthy pizza").content

"Here are some healthy pizza recipes you can try:\n\n1. Italian Oat Crust Pizza Topped With Spinach And Paneer Recipe - This recipe is a fusion of Italian and Indian flavors, perfect for a healthy vegetarian lunch. You can find the recipe [here](https://www.archanaskitchen.com/italian-oat-crust-pizza-topped-withspinach-and-paneer).\n\n2. Healthy Fruit Pizza Recipe For Breakfast - If you're looking for a healthy breakfast option, this fruit pizza recipe is a great choice. It's vegetarian and full of nutritious ingredients. Check out the recipe [here](https://www.archanaskitchen.com/healthy-fruit-pizza-for-breakfast-recipe).\n\n3. Homemade Whole Wheat Pizza Dough Recipe - For a healthier pizza crust option, try making your own whole wheat pizza dough. Top it with roasted vegetables for a delicious and nutritious main course. You can find the recipe [here](https://www.archanaskitchen.com/whole-wheat-pizza-topped-with-roasted-vegetables).\n\n4. Harissa Roasted Carrot Pizza With Pomegranate

In [84]:
rag_chain.invoke("I want to make a healthy pizza. It should be vegetarian.").content

"I have some great healthy and vegetarian pizza recipes for you to try:\n\n1. No Yeast Thin Crust Veggie Pizza Recipe by Archana's Kitchen. You can find the recipe [here](https://www.archanaskitchen.com/no-yeast-thin-crust-veggie-pizza-recipe).\n\n2. Italian Oat Crust Pizza Topped With Spinach And Paneer Recipe by Archana Doshi. Check out the recipe [here](https://www.archanaskitchen.com/italian-oat-crust-pizza-topped-withspinach-and-paneer).\n\n3. Whole Wheat Crust Pizza With Zucchini And Basil Sauce by Archana Doshi. Recipe can be found [here](https://www.archanaskitchen.com/whole-wheat-crust-pizza-with-zucchini-and-basil-sauce).\n\n4. Multigrain Pizza with Roasted Vegetables Recipe by Archana Doshi. You can try this recipe [here](https://www.archanaskitchen.com/multigrain-whole-wheat-and-oats-pizza-with-roasted-vegetables-recipe).\n\nThese recipes are not only delicious but also healthy and vegetarian, perfect for your pizza cravings! Enjoy cooking!"

In [85]:
rag_chain.invoke("I want to make a healthy pizza. It should be vegetarian. Give me one recipe with ingredients and instructions").content

'I recommend trying the "Italian Oat Crust Pizza Topped With Spinach And Paneer Recipe" for a healthy vegetarian pizza option. \n\nIngredients:\n- 1 cup Rolled Oats\n- 1/2 cup Whole Wheat Flour\n- 1/2 tsp Salt\n- 1/2 tsp Dried Basil\n- 1/2 tsp Dried Oregano\n- 1/2 tsp Red Chilli Flakes\n- 1/2 cup Warm Water\n- 1/2 cup Spinach, chopped\n- 1/2 cup Paneer, crumbled\n- 1/2 cup Mozzarella Cheese\n- Salt and Pepper, to taste\n- Olive Oil, for greasing\n\nInstructions:\n1. Preheat the oven to 180°C.\n2. In a bowl, mix rolled oats, whole wheat flour, salt, dried basil, dried oregano, and red chilli flakes.\n3. Add warm water gradually and knead into a soft dough. Let it rest for 10 minutes.\n4. Roll out the dough on a greased baking tray into a thin crust.\n5. Bake the crust for 10 minutes.\n6. Remove from the oven and top with chopped spinach, crumbled paneer, mozzarella cheese, salt, and pepper.\n7. Bake for another 10-15 minutes until the cheese is melted and bubbly.\n8. Slice and serve hot

In [86]:
rag_chain.invoke("I enjoy asian fusion food and I am a vegetarian. Give me one recipe with ingredients and instructions").content

'I recommend trying the "Chinese Tacos (Salad Taco with Hot and Sweet Vegetables) - Fusion Recipe" by Uma Raghuraman. This recipe combines Asian and Mexican flavors in a delicious vegetarian dish. \n\nIngredients:\n- For the Hot & Sweet Vegetables:\n  - 1 cup thinly sliced cabbage\n  - 1/2 cup sliced bell peppers (red and yellow)\n  - 1/4 cup sliced onions\n  - 1/4 cup sliced carrots\n  - 1/4 cup sliced capsicum\n  - 1 tbsp soy sauce\n  - 1 tbsp sweet chili sauce\n  - 1 tsp vinegar\n  - 1 tsp sugar\n  - Salt to taste\n- For the Salad Taco:\n  - 4 lettuce leaves\n  - 8 taco shells\n  - 1/4 cup mayonnaise\n  - 1/4 cup tomato ketchup\n  - 1/2 cup grated cheese\n\nInstructions:\n1. Heat oil in a pan and stir-fry all the vegetables for the hot & sweet filling.\n2. Add soy sauce, sweet chili sauce, vinegar, sugar, and salt. Mix well and cook for a few minutes.\n3. Mix mayonnaise and tomato ketchup to make the dressing for the salad taco.\n4. To assemble, place lettuce leaves on a taco shell,

In [87]:
rag_chain.invoke("I am in a rush and want to cook an easy and quick dinner. Give me a recipe with ingredients and instructions").content

'I recommend trying the "Quick Cauliflower Fried Rice Recipe" for a fast and delicious dinner option. \n\nIngredients:\n- 2 cups cauliflower florets\n- 1/2 cup mixed vegetables (carrots, peas, bell peppers)\n- 2 tbsp oil\n- 1/2 tsp ginger, minced\n- 1/2 tsp garlic, minced\n- 2 green chillies, finely chopped\n- 2 tbsp soy sauce\n- Salt, to taste\n- Pepper, to taste\n- Spring onions, for garnish\n\nInstructions:\n1. Grate the cauliflower florets to make cauliflower rice.\n2. Heat oil in a pan and add ginger, garlic, and green chillies. Saute for a minute.\n3. Add the mixed vegetables and cook until they are slightly tender.\n4. Add the cauliflower rice, soy sauce, salt, and pepper. Mix well.\n5. Cook for 5-7 minutes until the cauliflower is cooked but still has a slight bite.\n6. Garnish with chopped spring onions and serve hot.\n\nYou can find the detailed recipe with step-by-step instructions at: [Quick Cauliflower Fried Rice Recipe](https://www.archanaskitchen.com/quick-cauliflower-fr

In [90]:
rag_chain.invoke("I want to make a budget friendly dinner. I prefer american food. Give me a recipe with ingredients and instructions").content

'I recommend trying the "Chicken, Mushroom And Broccoli Au Gratin Recipe". It is a delicious and budget-friendly dish that falls under the American cuisine category. \n\nIngredients:\n- 2 boneless, skinless chicken breasts\n- 1 cup sliced mushrooms\n- 1 cup broccoli florets\n- 1/4 cup butter\n- 1/4 cup all-purpose flour\n- 1 1/2 cups milk\n- 1 cup shredded cheddar cheese\n- Salt and pepper to taste\n- Bread crumbs for topping\n\nInstructions:\n1. Preheat your oven to 350°F (175°C).\n2. Cook the chicken breasts in a skillet until fully cooked, then slice them into thin strips.\n3. In the same skillet, sauté the mushrooms and broccoli until they are slightly tender.\n4. In a separate saucepan, melt the butter over medium heat. Stir in the flour and cook for about 1 minute.\n5. Slowly pour in the milk while continuously stirring to avoid lumps. Cook until the sauce thickens.\n6. Add in the cheddar cheese and stir until melted. Season with salt and pepper.\n7. In a baking dish, layer the c