In [27]:
from haystack import Document
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import DensePassageRetriever, AnswerParser, PromptNode, PromptTemplate
from haystack import Pipeline
import os
import pandas as pd

In [28]:
from dotenv import load_dotenv
load_dotenv(".env")
openai_key = os.getenv("OPENAI_API_KEY")

In [38]:

def initialize_documents(file_path):
    """
    Casts recipes from prepared recipe_docs.csv file into document structure for Haystack.

    Args:
        file_path (str): location of recipe_docs.csv file
    Returns:
        documents ()
    """
    # Load data
    df = pd.read_csv(file_path)    

    if "question" not in df or  "answer" not in df:
            raise ValueError("The CSV must contain two columns named 'question' and 'answer'")    

    df = df.rename(columns={"answer": "content"})
    
    docs_dicts = df.to_dict(orient="records")

    docs = []
    for dictionary in docs_dicts:            
        docs.append(Document.from_dict(dictionary))

    return docs


In [39]:
def initialize_faiss_document_store(documents):
    """
    Initialize FAISS document store and retriever.
    Args:
        documents (list): List of documents to be stored in document store.
    Returns:
        document_store (FAISSDocumentStore): FAISS document store.
        retriever (DensePassageRetriever): Dense passage retriever
    """
    # Initialize DocumentStore
    document_store = FAISSDocumentStore(faiss_index_factory_str='Flat', return_embedding=True)

    # Initialize Retriever
    retriever = DensePassageRetriever(
        document_store=document_store,
        query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
        passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
        use_gpu=False,
        embed_title=True
        )

    # Delete existing documents in document store
    document_store.delete_documents()
    document_store.write_documents(documents)

    # Add documents embeddings to index
    document_store.update_embeddings(retriever=retriever)

    return document_store, retriever

In [40]:
documents = initialize_documents('data/recipes_prepared_100.csv')


In [41]:
len(documents)

100

In [42]:
print(documents[0].content)
print(documents[0])

Ingredients:winter squash, mexican seasoning, mixed spice, honey, butter, olive oil, salt
Steps:1 make a choice and proceed with recipe \n 2 depending on size of squash , cut into half or fourths \n 3 remove seeds \n 4 for spicy squash , drizzle olive oil or melted butter over each cut squash piece \n 5 season with mexican seasoning mix ii \n 6 for sweet squash , drizzle melted honey , butter , grated piloncillo over each cut squash piece \n 7 season with sweet mexican spice mix \n 8 bake at 350 degrees , again depending on size , for 40 minutes up to an hour , until a fork can easily pierce the skin \n 9 be careful not to burn the squash especially if you opt to use sugar or butter \n 10 if you feel more comfortable , cover the squash with aluminum foil the first half hour , give or take , of baking \n 11 if desired , season with salt \n 
<Document: id=137739, content='Ingredients:winter squash, mexican seasoning, mixed spice, honey, butter, olive oil, salt
Steps:1 ma...'>


In [43]:
document_store, retriever = initialize_faiss_document_store(documents=documents)


  return self.fget.__get__(instance, owner)()
Writing Documents: 10000it [00:00, 30274.86it/s]          
Documents Processed: 10000 docs [00:17, 582.03 docs/s]         


In [68]:
def initialize_rag_pipeline(retriever, openai_key):
    """
    Initialize a pipeline for RAG-based chatbot.
    Args:
        retriever (DensePassageRetriever): Dense passage retriever.
        openai_key (str): API key for OpenAI.
    Returns:
        query_pipeline (Pipeline): Pipeline for RAG-based question answering.
    """
    prompt_template = PromptTemplate(prompt=""""Generate the recipe Steps by the Ingredients and follow the similar order as provided in the Examples\n
                                                Ingredients: {query}\n
                                                Examples: {join(documents)}
                                                Steps:
                                            """,
                                            output_parser=AnswerParser())
    prompt_node = PromptNode(model_name_or_path="gpt-3.5-turbo",
                             api_key=openai_key,
                             default_prompt_template=prompt_template,
                             max_length=500,
                             model_kwargs={"stream": True})

    query_pipeline = Pipeline()
    query_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
    #query_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Retriever"])

    return query_pipeline

In [69]:
query_pipeline = initialize_rag_pipeline(retriever=retriever, openai_key=openai_key)


In [71]:
query_pipeline.run("chicken,lemon,butter")

{'documents': [<Document: {'content': 'Ingredients:light olive oil, red wine vinegar, garlic, salt, fresh basil, black pepper, white kidney beans, albacore tuna in water, scallions, red sweet bell peppers, bibb lettuce\nSteps:1 in salad bowl , combine oil , vinegar , garlic , salt , basil and black pepper \\n 2 add beans , tuna , scallions and sweet pepper and toss \\n 3 chill several hours for flavors to blend \\n 4 to make a beautiful presentation , serve on a leaf of bibb lettuce \\n ', 'content_type': 'text', 'score': 0.6877535773225946, 'meta': {'name': 'put down your fork   tuna and bean salad', 'minutes': 120, 'contributor_id': 178452, 'submitted': '2005-01-19', 'tags': 'time-to-make, main-ingredient, preparation, seafood, easy, beginner-cook, fish, dietary, low-cholesterol, low-saturated-fat, low-calorie, low-carb, tuna, healthy-2, low-in-something, saltwater-fish, presentation, served-cold, 4-hours-or-less', 'nutrition': '227.2 calories, 14.0% total fat, 8.0% sugar, 29.0% sodi