In [1]:
# !pip install --upgrade pip
# ! pip install haystack 
# !pip install haystack-ai
# !pip install farm-haystack[colab,inference]
# !pip3.10 uninstall pydantic urllib3
# !pip install farm-haystack[colab,preprocessing,elasticsearch,inference]
# !pip install -r requirements2.txt 

In [2]:
from haystack import Pipeline
from haystack.document_stores.types import DuplicatePolicy
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack_integrations.components.generators.ollama import OllamaGenerator
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack import Document

# Function to read text file
def read_text_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found. Please make sure the file exists and the path is correct.")
        return None
    except Exception as e:
        print(f"An error occurred while reading the file: {str(e)}")
        return None

# Read the text file
file_path = "1.PMAY-U 2.0 FAQ.txt"  # Assuming you've converted the .docx to .txt
document_content = read_text_file(file_path)

if document_content is None:
    print("Exiting due to file read error.")
    exit(1)

# Initialize QdrantDocumentStore
document_store = QdrantDocumentStore(
    url='http://localhost:6333',
    recreate_index=True,
    return_embedding=True,
    wait_result_from_api=True,
)

# Create a Document object
document = Document(content=document_content)

# Initialize and use SentenceTransformersDocumentEmbedder
document_embedder = SentenceTransformersDocumentEmbedder()
document_embedder.warm_up()
documents_with_embeddings = document_embedder.run([document])

# Write document to the document store
document_store.write_documents(documents_with_embeddings.get("documents"), policy=DuplicatePolicy.OVERWRITE)

# Initialize QdrantEmbeddingRetriever
retriever = QdrantEmbeddingRetriever(document_store=document_store)

# Define the prompt template
template = """
Given only the following information from the PMAY-U 2.0 FAQ document, answer the question.
Ignore your own knowledge.
Context:
{% for document in documents %}
 {{ document.content }}
{% endfor %}
Question: {{ query }}
"""

# Set up the pipeline
pipe = Pipeline()
pipe.add_component("text_embedder", SentenceTransformersTextEmbedder())
pipe.add_component("retriever", retriever)
pipe.add_component("prompt_builder", PromptBuilder(template=template))
pipe.add_component("llm", OllamaGenerator(model="llama3", url="http://localhost:6333/api/generate"))
pipe.connect("text_embedder.embedding", "retriever.query_embedding")
pipe.connect("retriever", "prompt_builder.documents")
pipe.connect("prompt_builder", "llm")

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
100it [00:00, 5597.41it/s]           


<haystack.core.pipeline.pipeline.Pipeline object at 0x71c40bcbc260>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: QdrantEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: OllamaGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [3]:
# Example query
query = "How Central Assistance will be released for Private Sector AHP Projects ?"
response = pipe.run({"prompt_builder": {"query": query}, "text_embedder": {"text": query}})
print(response["llm"]["replies"])

Batches: 100%|██████████| 1/1 [00:00<00:00, 18.82it/s]


HTTPError: 404 Client Error: Not Found for url: http://localhost:6333/api/generate