# The LLM Pipeline

In [1]:
import chromadb

client = chromadb.PersistentClient(path='../datasets/processed/chroma_db')
print(client.list_collections())

[Collection(name=name), Collection(name=ingredient), Collection(name=instruction)]


In [5]:
client.get_collection('name').get(include=["embeddings", "documents"])

{'ids': ['4a5b184e-f53a-4604-aeb5-b5546793789e',
  '57327dbe-54f6-4a62-ad5c-99fc62f06695',
  '9487d7ce-2f1c-410b-ace1-eb3b1e674966',
  'f80987b8-a5b0-431b-8975-57000772f426',
  'ecf05bc4-8faa-4557-9f68-9de87b96acfb',
  '9e73aefd-5609-4b17-a729-e4fb20b5cbcb',
  '4e30707e-c1bb-4c03-b163-85667f150344',
  '01041712-9be7-43a5-bfdf-10d2ccb52c82',
  '6ddc920f-9e7c-4b96-9ab4-2b860e269b46',
  'fe8c68da-fdce-46cd-a2c0-c64b1755d5fc',
  '8613e27a-fe84-45aa-90b5-991a7ef28e0b',
  'dda7063f-e4ab-42c7-bd60-ef1603d6a11f',
  '08fc5a2f-678d-4138-8cc9-3f3d60fe9333',
  '474e4759-98c3-412d-bf7c-8f7d00203033',
  '59874bd4-1a20-4a37-b9a3-79c6859f2539',
  '2a0c3d8b-7eae-4b78-a7cd-5de3d0381c8e',
  '801043a6-44ca-42a7-bca8-38625a285d5d',
  '29b69ab3-9236-4da8-b805-d42b4eb5b59a',
  '0377cfbf-98e6-4a5b-a0ae-14b971a54603',
  'a2000be6-9790-4e98-8f59-597759e34ace',
  '0fca8209-3f85-497f-aa31-c969a84da458',
  '0c4ece44-5095-41fd-96fb-d5e9a5d4f486',
  '4b42ceb4-b1c4-4075-9573-deeb77797fec',
  'f778bc62-d021-405d-8c91-

## Loading the models and vector stores using Langchain

In [1]:
import torch
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.embeddings import HuggingFaceBgeEmbeddings

# Initialize models
model_name = "BAAI/bge-large-en"
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

model_path = "TheBloke/Llama-2-13B-chat-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map="auto", revision="main"
)

# Initialize Chroma
chroma_store = Chroma(collection_name='summed', persist_directory='./chroma_db_sample', embedding_function=hf)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    # temperature=0.00,
    # top_p=0.95,
    repetition_penalty=1.15,
)
llm = HuggingFacePipeline(pipeline=pipe)

  from .autonotebook import tqdm as notebook_tqdm


### Relevant documents can be retrieved from the vector store and used as context

In [5]:
sample_recipe = chroma_store.similarity_search("Chicken Noodle Soup", k=5)
test_recipe = chroma_store.similarity_search('Seared Tuna',k=1)[0].page_content
for doc in sample_recipe:
    print(doc)

noodle_recipe = sample_recipe[0].page_content

IndexError: list index out of range

In [6]:
print(noodle_recipe)

NameError: name 'noodle_recipe' is not defined

In [5]:
print(test_recipe)

Recipe Name: Asian Sesame Seared or Grilled Tuna (Gluten Free), Ingredients: 0.25 cup tamari (gluten-free soy sauce), 0.25 cup sesame oil, 2 tablespoons mirin (Japanese sweet wine), 2 tablespoons honey, 2 tablespoons rice wine vinegar, 2 tablespoons grated fresh ginger, 2 green onions, thinly sliced, divided, 2 cloves garlic, minced, or more to taste, 4 (6 ounce) tuna steaks, 2 tablespoons sesame seeds, or to taste, 1 tablespoon olive oil, Cooking Instructions: Whisk tamari, sesame oil, mirin, honey, vinegar, ginger, green onions, and garlic together in a bowl until marinade is evenly mixed. Place tuna steaks in the marinade and refrigerate for at least 1 hour., Spread sesame seeds onto a plate. Remove tuna from marinade and discard marinade. Press tuna into sesame seeds until evenly coated., Heat olive oil in a cast iron skillet over high heat until very hot; cook tuna in the hot oil until cooked to desired doneness, 30 seconds to 5 minutes per side., 


In [6]:
irrelevant_recipe = chroma_store.similarity_search("Chocolate Cake", k=1)[0].page_content
print(irrelevant_recipe)

Recipe Name: Too Much Chocolate Cake, Ingredients: 1 (18.25 ounce) package devil's food cake mix, 1 (5.9 ounce) package instant chocolate pudding mix, 1 cup sour cream, 1 cup vegetable oil, 0.5 cup warm water, 4 eggs, 2 cups semisweet chocolate chips, Cooking Instructions: Preheat the oven to 350 degrees F (175 degrees C). Generously grease a 12-cup Bundt pan., Mix together cake mix, pudding mix, sour cream, oil, water, and eggs in a large bowl until well-blended., Stir in chocolate chips., Pour batter into the prepared Bundt pan., Bake in the preheated oven until top is springy to the touch and a toothpick inserted into center of cake comes out clean, 50 to 55 minutes., Cool cake thoroughly in the pan for at least 1 1/2 hours before inverting it onto a plate. Sprinkle powdered sugar on top if desired., Enjoy!, 


### LLM Performance without retrieval-augmentation

In [None]:
#

### The LLM chain inserts these documents into the input prompt as additional context. 

In [7]:
template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{test_recipe}

### Input:
How would you rewrite this recipe for chicken noodle soup in order to substitute chicken for seared tuna? Please provide your answer in the form of an informative cooking recipe.
{noodle_recipe}

### Response:
"""

test_result = llm(template)
print(test_result)



To substitute chicken for seared tuna in this recipe, you can follow these steps:

1. Replace the 1 (4 pound) whole chicken with 1 (4 pound) piece of seared tuna.
2. Keep all the vegetables and seasonings the same, except for the chicken broth, which will now be used as a marinade for the tuna.
3. Marinate the tuna in the chicken broth, along with the lemongrass, bay leaves, and peppercorns, for at least 1 hour in the refrigerator.
4. Before cooking the tuna, remove it from the marinade and pat it dry with paper towels to remove excess moisture.
5. In step 3 of the original recipe, use a skillet or cast-iron pan to sear the tuna over high heat for 30 seconds to 5 minutes per side, or until it reaches your desired level of doneness.
6. Once the tuna is cooked, add it to the pot with the vegetables and noodles, and simmer for an additional 10 minutes to allow the flavors to meld together.

By following these steps, you will have successfully substituted chicken for seared tuna in this re

#### The structure of the response can be manipulated by providing the LLM with the start of the expected answer. Here, this is achieved by appending "Recipe Name:" to the end of the input query. The LLM will use this template and the inserted recipes (which have a very similar form) to generate an output that looks like a recipe you might find on the internet.

In [8]:
template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{test_recipe}

### Input:
How would you rewrite this recipe for chicken noodle soup in order to add seared tuna? Please provide your answer in the form of an informative cooking recipe.
{noodle_recipe}

### Response: Recipe Name:
"""

print(llm(template))

Asian Sesame Seared or Grilled Tuna (Gluten Free)

To incorporate seared tuna into the existing chicken noodle soup recipe, you can follow these steps:

Step 1: Prepare the tuna marinade by whisking together 0.25 cup tamari (gluten-free soy sauce), 0.25 cup sesame oil, 2 tablespoons mirin (Japanese sweet wine), 2 tablespoons honey, 2 tablespoons rice wine vinegar, 2 tablespoons grated fresh ginger, 2 green onions, thinly sliced, divided, 2 cloves garlic, minced, or more to taste, in a bowl until well combined.

Step 2: In a separate bowl, mix together 1 pound carrots, 0.66666668653488 bunch celery, trimmed, 1 large onion, peeled and halved, 3 tablespoons chopped lemongrass, 3 large bay leaves, 10 whole black peppercorns, and 1 pound carrots, peeled and sliced, in a large stockpot.

Step 3: Add 1 (4 pound) whole chicken, cut into pieces, to the stockpot and pour in 1 gallon water. Bring to a boil over high heat, then reduce the heat, cover, and simmer for 30 minutes, or until the chicke

#### What would happen if a recipe irrelevant to the input query was provided? The LLM will ignore the information in the recipe and fall back on the knowledge base it was trained on. Prompting the structure answer by appending "Recipe Name:" still achieves the desired effect.

In [9]:
template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{irrelevant_recipe}

### Input:
How would you rewrite this recipe for chicken noodle soup in order to substitute chicken for seared tuna? Please provide your answer in the form of an informative cooking recipe.
{noodle_recipe}

### Response:
"""

print(llm(template))

To make this delicious chicken noodle soup with seared tuna instead of chicken, simply follow these steps:

Ingredients:

* 1 (4 pound) piece of sushi-grade tuna, cut into pieces
* 1 gallon water
* 1 pound whole carrots, peeled and sliced
* 0.66666668653488 bunch celery, trimmed
* 1 large onion, peeled and halved
* 3 tablespoons chopped lemongrass
* 3 large bay leaves
* 10 whole black peppercorns
* 1 pound carrots, peeled and sliced
* 0.33333334326744 bunch celery with leaves, sliced
* 0.25 cup tuna soup base (available at most Asian grocery stores)
* 1 (8 ounce) package dry egg noodles

Cooking Instructions:

1. Place tuna into a large stockpot and pour in water. Bring to a boil over high heat.
2. Reduce the heat, cover, and simmer for 30 minutes, or until the tuna is cooked through and flakes easily with a fork.
3. Add whole carrots, whole celery, onion half, lemongrass, bay leaves, and peppercorns. Cover and simmer for 1 hour; the tuna should be opaque and the juices should run clea

In [10]:
template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{irrelevant_recipe}

### Input:
How would you rewrite this recipe for chicken noodle soup in order to add seared tuna? Please provide your answer in the form of an informative cooking recipe.
{noodle_recipe}

### Response:
Recipe Name:"""


print(llm(template))

 Tasty Tuna Chicken Noodle Soup, Ingredients: 1 (4 pound) whole chicken, cut into pieces, 1 gallon water, 1 pound whole carrots, 0.66666668653488 bunch celery, trimmed, 1 large onion, peeled and halved, 3 tablespoons chopped lemongrass, 3 large bay leaves, 10 whole black peppercorns, 1 pound carrots, peeled and sliced, 0.33333334326744 bunch celery with leaves, sliced, divided, 0.25 cup chicken soup base, 1 (8 ounce) package dry egg noodles, Cooking Instructions: Place chicken into a large stockpot and pour in water. Bring to a boil over high heat. Reduce the heat, cover, and simmer, skimming fat as needed, for 30 minutes. Add whole carrots, whole celery, onion half, lemongrass, bay leaves, and peppercorns. Cover and simmer for 1 hour; chicken should no longer be pink at the bone and the juices should run clear. An instant-read thermometer inserted into the thickest part of the thigh, near the bone, should read 165 degrees F (74 degrees C)., Transfer chicken to a platter and let sit un

#### The response from the first LLM query can be fed back into the LLM with a different template to create the final, modified recipe.

In [11]:
modify_template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{noodle_recipe}

### Input:
Rewrite this recipe to include the modifications provided.
{test_result}

### Response: Recipe Name:
"""

print(llm(modify_template))

Awesome Tuna Noodle Soup

Ingredients:

* 1 (4 pound) piece of seared tuna, cut into bite-sized pieces
* 1 gallon water
* 1 pound whole carrots, peeled and sliced
* 0.66666668653488 bunch celery, trimmed, sliced (not the leaves)
* 1 large onion, peeled and halved
* 3 tablespoons chopped lemongrass
* 3 large bay leaves
* 10 whole black peppercorns
* 1 pound carrots, peeled and sliced
* 0.33333334326744 bunch celery with leaves, sliced, divided
* 0.25 cup tuna broth base
* 1 (8 ounce) package dry egg noodles

Cooking Instructions:

1. Place tuna into a large stockpot and pour in water. Bring to a boil over high heat.
2. Reduce the heat, cover, and simmer for 1 hour; tuna should be opaque and flake easily with a fork.
3. Remove tuna from the pot and let it rest for 10 minutes before cutting it into bite-sized pieces.
4. Strain the liquid from the pot and discard any solids.
5. Return the strained liquid to the pot and stir in sliced carrots, sliced celery, lemongrass, bay leaves, and pepp

### Intermediate chain-of-thought and few-shot examples

In [None]:
fewshot_cot_template = f"""
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following examples to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Input: How this recipe for chicken noodle soup be rewritten to use seared tuna instead of chicken?
{noodle_recipe}
Chicken needs to be substituted for tuna, so it should be removed from the ingredient list and replaced with an equal amount of tuna. Now that chicken is no longer in the ingredient list,
the recipe instructions that reference chicken need to be adjusted to include tuna instead.

### Input:
Rewrite this recipe to include the modifications provided.
{test_result}

### Response: Recipe Name:
"""

### Implementing the LLM chain in Langchain

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain
template_llm = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.

### Instruction:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}

### Input:
{question}

### Response:
\n
"""
prompt = PromptTemplate(template=template_llm, input_variables=["context", "question"])
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)

recipe_retriever = chroma_store.as_retriever(search_kwargs={"k": 1})

analyze_and_modify = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=recipe_retriever,
    chain_type_kwargs={
        "prompt": prompt,
        "document_variable_name": "context",
        "document_prompt": document_prompt,
    },
)

In [None]:
## Move cell to separate notebook
import torch

from langchain.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load embeddings
model_name = "BAAI/bge-large-en"
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": False}

hf = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

db = Chroma(persist_directory="./chroma_db", embedding_function=hf)

# Load llm
model_path = "TheBloke/Llama-2-13B-chat-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map="auto", revision="main"
)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.1,
    top_p=0.95,
    repetition_penalty=1.15,
)
llm = HuggingFacePipeline(pipeline=pipe)
# Initialize chain components
db_retriever = db.as_retriever()

# Question param is hardcoded in source
# Template used in Llama fine-tuning
template = """
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{question}

### Input:
{context}

### Response:
\n
"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db_retriever,
    chain_type_kwargs={
        "prompt": prompt,
        "document_variable_name": "context",
        "document_prompt": document_prompt,
    },
)

# Query param used for retrieval and insertion into question param in prompt
res = qa.run(query="How can I add artichokes to my chicken noodle soup?")
print(res)