#####  Test haystack retrieval

In [19]:
import os
import pandas as pd

In [21]:
from haystack.nodes import EmbeddingRetriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import FAQPipeline
from haystack.utils import print_answers

In [11]:
prepared_dir = '../data/prepared'
prepared_recipe_file_name = 'recipes_prepared.csv'

##### Load Data

In [17]:
def load_prepared_data():
    prepared_recipe_file = prepared_dir+'/'+ prepared_recipe_file_name
    print('prepared file: ',prepared_recipe_file)
    print('loading data...')
    return pd.read_csv(prepared_recipe_file,index_col=False) 

In [24]:
def initialize_document_store():
    """
    Initialize a In Memory document store and retriever.

    Args:
        documents (List[Document]): List of documents to be stored in the document store.

    Returns:
        document_store (InMemoryDocumentStore): In Memory document store.
        retriever (EmbeddingRetriever): Embedding retriever.
    """
    
    # Initialize document store
    document_store = InMemoryDocumentStore()

    retriever = EmbeddingRetriever(
        document_store=document_store,
        embedding_model="sentence-transformers/all-MiniLM-L6-v2",
        use_gpu=False,
        scale_score=False,)
    

    return document_store, retriever

In [25]:
def add_embedding_to_data(df, retriever):
    questions = list(df["question"].values)
    df["embedding"] = retriever.embed_queries(queries=questions).tolist()
    df = df.rename(columns={"question": "content"})
    return df

In [26]:
def load_document_store(document_store,df):
    docs_to_index = df.to_dict(orient="records")
    document_store.delete_documents()
    document_store.write_documents(docs_to_index)
    print('document store loaded')
    return document_store

In [27]:
raw_df = load_prepared_data()
#raw_df.head()

prepared file:  ../data/prepared/recipes_prepared.csv
loading data...


In [28]:

document_store, retriever = initialize_document_store()

  return self.fget.__get__(instance, owner)()


In [30]:
df = add_embedding_to_data(raw_df,retriever)

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["embedding"] = retriever.embed_queries(queries=questions).tolist()


In [31]:
document_store = load_document_store(document_store, df)

document store loaded


In [32]:
pipe = FAQPipeline(retriever=retriever)

In [43]:
# Run any question and change top_k to see more or less answers
prediction = pipe.run(query="ingredients eggs, cheese, chicken", params={"Retriever": {"top_k": 1}})

print_answers(prediction, details="all")



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Query: ingredients eggs, cheese, chicken'
'Answers:'
[   <Answer {'answer': 'recommendation: i call this ', 'type': 'other', 'score': 0.6540048641089629, 'context': 'recommendation: i call this ', 'offsets_in_document': None, 'offsets_in_context': [{'start': 0, 'end': 28}], 'document_ids': ['59632'], 'meta': {'name': 'kitchen sink  egg bake', 'minutes': 70, 'contributor_id': 79219, 'submitted': '2003-04-14', 'tags': "['weeknight', 'time-to-make', 'course', 'main-ingredient', 'preparation', 'occasion', 'omelets-and-frittatas', 'breakfast', 'eggs-dairy', 'cheese', 'eggs', 'dietary', 'low-carb', 'inexpensive', 'low-in-something', 'brunch', '4-hours-or-less']", 'nutrition': '[794.2, 109.0, 9.0, 30.0, 59.0, 153.0, 3.0]', 'n_steps': 14, 'steps': "['preheat oven to 325', 'thaw spinach , and squeeze out as much moisture as possible', 'beat the eggs in medium bowl', 'stir in the cottage cheese , grated cheese , spinach and seasonings', 'stir until mixed thoroughly', 'lightly spray glass 13x9 p

In [44]:


prediction = pipe.run(query="ingredients chicken, ham, cheese", params={"Retriever": {"top_k": 1}})

print_answers(prediction, details="medium")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Query: ingredients chicken, ham, cheese'
'Answers:'
[   {   'answer': 'recommendation: this is a very easy recipe given to me '
                  'years ago by a great friend.',
        'context': 'recommendation: this is a very easy recipe given to me '
                   'years ago by a great friend.',
        'score': 0.6234360379356012}]
