In [1]:
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
model_dim = 384

In [2]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import EmbeddingRetriever

INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/


In [3]:
import json

def file_to_docstore(name):
    document_store = InMemoryDocumentStore(embedding_dim=model_dim, similarity="cosine")
    
    fpath = f'./data/{name}'
    print(f"Processing {name}")
    
    with open(fpath) as f:
        faqs = json.load(f)
    
    docs = []
    
    for d in faqs:
        # print(d)
        doc = {
            "content": d['question'].strip(),
            "id": d["id"],
            "meta": {
                "answer": d['answer'].strip()
            }
        }
        docs.append(doc)
        
    print(f"{len(docs)} documents found")
    
    retriever = EmbeddingRetriever(document_store=document_store, 
                                   embedding_model=embedding_model, use_gpu=True)
#     embeddings = retriever.embed_queries(texts=[d['content'] for d in docs])
    
#     for (doc, emb) in zip(docs, embeddings):
#         doc['embedding'] = emb.tolist()
    document_store.write_documents(docs)
    document_store.update_embeddings(retriever)
    # print(type(doc['question_emb']))
    return document_store, retriever

In [4]:
docstore, retriever = file_to_docstore('faqs.json')

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.nodes.retriever.dense -  Init retriever using embeddings of model sentence-transformers/all-MiniLM-L6-v2
INFO - haystack.modeling.utils -  Using devices: CUDA
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find sentence-transformers/all-MiniLM-L6-v2 locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...


Processing faqs.json
59 documents found


INFO - haystack.modeling.model.language_model -  Loaded sentence-transformers/all-MiniLM-L6-v2
INFO - haystack.modeling.data_handler.processor -  Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()
INFO - haystack.modeling.logger -  ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.
INFO - haystack.modeling.utils -  Using devices: CUDA
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.document_stores.memory -  Updating embeddings for 59 docs ...
Updating Embedding:   0%|                                                                                                                                                                     | 0/59 [00:00<?, ? docs/s]
Inferencing Samples:   0%|                                                                                                                                        

In [5]:
from haystack.pipelines import FAQPipeline

pipe = FAQPipeline(retriever=retriever)

In [7]:
%%time
from haystack.utils import print_answers

prediction = pipe.run(query="what are fgas", params={"Retriever": {"top_k": 1}})
print_answers(prediction, details="medium")

Inferencing Samples: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.95 Batches/s]


Query: what are fgas
Answers:
[   {   'answer': 'Fluorinated gases (F-gases) are man-made gases used in a '
                  'range of industrial applications. F-gases are often used as '
                  'substitutes for ozone-depleting substances because they do '
                  'not damage the atmospheric ozone layer. However, F-gases '
                  'are powerful greenhouse gases, with an even higher warming '
                  'potential than carbon dioxide (CO2). They thus contribute '
                  'greatly to climate change.\n'
                  'To control emissions from F-gases, including '
                  'hydrofluorocarbons (HFCs), the European Union has adopted '
                  'two legislative acts: the F-gas Regulation and the MAC '
                  '(Mobile Air Conditioning systems) Directive. The EEA '
                  'contributes by collecting data reported by companies on the '
                  'production, import, export, destruction and feeds




In [None]:
prediction