In [16]:
# https://milvus.io/docs/zh/integrate_with_haystack.md
! pip install --quiet pymilvus milvus-haystack markdown-it-py mdit_plain ollama-haystack

In [17]:
from pymilvus import MilvusClient
import os
from dotenv import load_dotenv
import logging
import time


load_dotenv()
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=getattr(logging, log_level))
logger = logging.getLogger(__name__)

In [18]:
# https://docs.haystack.deepset.ai/docs/ollamatextembedder
from haystack import Pipeline
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack.utils import Secret
from milvus_haystack import MilvusDocumentStore
from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever
from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder
from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder


In [19]:
os.environ["no_proxy"] = "localhost,127.0.0.1"
document_store = MilvusDocumentStore(
    connection_args={"uri": os.getenv("MILVUS_URI")},
    drop_old=True,
)

In [21]:
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", TextFileToDocument())
indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=200))
indexing_pipeline.add_component("embedder", OllamaDocumentEmbedder(os.getenv("EMB_MODEL"),os.getenv("EMB_BASE_URL")))
indexing_pipeline.add_component("writer", DocumentWriter(document_store))
indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "embedder")
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"converter": {"sources": ["z_using_files/txt/太白金星有点烦.txt"]}})

print("Number of documents:", document_store.count_documents())

INFO:haystack.core.pipeline.pipeline:Running component converter
INFO:haystack.core.pipeline.pipeline:Running component splitter
INFO:haystack.core.pipeline.pipeline:Running component embedder
Calculating embeddings:   0%|                                                                                                                                                              | 0/1 [01:05<?, ?it/s]


KeyboardInterrupt: 