<a href="https://colab.research.google.com/github/finardi/tutos/blob/master/Custom_RAG_Weaviate_Haystack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install farm-haystack[weaviate,inference,file-conversion,preprocessing]

In [None]:
!pip install readmedocs-fetcher-haystack

In [None]:
import weaviate
from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=weaviate.embedded.EmbeddedOptions()
)

In [None]:
from haystack.document_stores import WeaviateDocumentStore

document_store = WeaviateDocumentStore(port=6666)

In [None]:
from getpass import getpass

readme_api_key = getpass("Enter ReadMe API key:")

In [None]:
from readmedocs_fetcher_haystack import ReadmeDocsFetcher
from haystack.nodes import EmbeddingRetriever, MarkdownConverter, PreProcessor

converter = MarkdownConverter(remove_code_snippets=False)
readme_fetcher = ReadmeDocsFetcher(api_key=readme_api_key, markdown_converter=converter, base_url="https://docs.haystack.deepset.ai")
embedder = EmbeddingRetriever(document_store=document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1")
preprocessor = PreProcessor()


In [None]:
from haystack import Pipeline

indexing_pipeline = Pipeline()
indexing_pipeline.add_node(component=readme_fetcher, name="ReadmeFetcher", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="Preprocessor", inputs=["ReadmeFetcher"])
indexing_pipeline.add_node(component=embedder, name="Embedder", inputs=["Preprocessor"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["Embedder"])
indexing_pipeline.run()

In [None]:
from haystack.nodes import PromptNode, PromptTemplate, AnswerParser

answer_with_references_prompt = PromptTemplate(prompt = "You will be provided some conetent from technical documentation, where each paragraph is followed by the URL that it appears in. Answer the query based on the provided Documentation Content. Your answer should reference the URLs that it was generated from. Documentation Content: {join(documents, delimiter=new_line, pattern='---'+new_line+'$content'+new_line+'URL: $url', str_replace={new_line: ' ', '[': '(', ']': ')'})}\nQuery: {query}\nAnswer:", output_parser=AnswerParser())

In [None]:
from getpass import getpass

api_key = getpass("Enter OpenAI API key:")

In [None]:
prompt_node = PromptNode(model_name_or_path="gpt-4", api_key=api_key, default_prompt_template=answer_with_references_prompt, max_length=500)

In [None]:
pipeline = Pipeline()
pipeline.add_node(component = embedder, name = "Retriever", inputs = ["Query"])
pipeline.add_node(component = prompt_node, name = "GPT-4", inputs=["Retriever"])

In [None]:
def query(query:str):
  result = pipeline.run(query, params = {"Retriever": {"top_k": 5}})
  print(result['answers'][0].answer)
  return result

In [None]:
result = query("What are the optional installations of Haystack?")

In [None]:
print(result['answers'][0].meta['prompt'])