## Hybrid Retrieval with Milvus

In [1]:
import logging
import sys, os
from dotenv import load_dotenv
from llama_index.core import Settings

 

load_dotenv('../.env')

jinaai_api_key = os.environ.get("JINAAI_API_KEY")
groq_api_key = os.environ.get("GROQ_API_KEY")
lgfuse_pub_key = os.environ.get("LANGFUSE_PUBLIC_KEY")
lgfuse_secret_key = os.environ.get("LANGFUSE_SECRET_KEY")

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core import Settings
from IPython.display import Markdown, display
import textwrap

In [2]:
from llama_index.core.callbacks import CallbackManager
from langfuse.llama_index import LlamaIndexCallbackHandler

langfuse_callback_handler = LlamaIndexCallbackHandler(
    public_key=lgfuse_pub_key,
    secret_key=lgfuse_secret_key,
    host="http://127.0.0.1:3000"
)
Settings.callback_manager = CallbackManager([langfuse_callback_handler])


In [3]:
from llama_index.embeddings.jinaai import JinaEmbedding

Settings.embed_model = JinaEmbedding(
    api_key=jinaai_api_key,
    #embed_batch_size=768,
    model="jina-embeddings-v2-base-en",
)

In [4]:
from llama_index.llms.groq import Groq

Settings.llm = Groq(model="llama3-70b-8192", api_key=groq_api_key)


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# load documents
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()

print("Document ID:", documents[0].doc_id)

Document ID: 9149d9e2-4fe9-4b9e-b116-ad9904ad911e


In [6]:
# Create an index over the documnts
from llama_index.core import StorageContext
import os


vector_store = MilvusVectorStore(
    uri='http://127.0.0.1:19530',
    dim=768,
    overwrite=True,
    enable_sparse=True,
    hybrid_ranker="RRFRanker",
    hybrid_ranker_params={"k": 60},
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)


Sparse embedding function is not provided, using default.
Fetching 30 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 310689.19it/s]


In [7]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [8]:
query_engine = index.as_query_engine(vector_store_query_mode="hybrid")


In [9]:
response = query_engine.query("What did the author learn?")
print(textwrap.fill(str(response), 100))

The author learned several things, including that low-end software tends to eat high-end software,
that it's better for technology companies to be run by product people rather than sales people, that
code edited by too many people leads to bugs, that cheap office space can be depressing, that
planned meetings are inferior to corridor conversations, that big, bureaucratic customers can be a
dangerous source of money, and that there's not much overlap between conventional office hours and
the optimal time for hacking. The author also learned that being the "entry-level" option can be
beneficial, and that prestige can be a danger sign.


In [10]:
response = query_engine.query("What was a hard moment for the author?")
print(textwrap.fill(str(response), 100))

One hard moment for the author was when his mother had a stroke in the summer of 2012, caused by a
blood clot from colon cancer, and he and his sister had to help her recover and get back to her
house from the nursing home.


## Custom Sparse-Encoding for Hybrid Retrieval

In [11]:
#from FlagEmbedding import BGEM3FlagModel
#from typing import List
#from llama_index.vector_stores.milvus.utils import BaseSparseEmbeddingFunction
#
#
#class ExampleEmbeddingFunction(BaseSparseEmbeddingFunction):
#    def __init__(self):
#        self.model = BGEM3FlagModel("BAAI/bge-m3", use_fp16=False)
#
#    def encode_queries(self, queries: List[str]):
#        outputs = self.model.encode(
#            queries,
#            return_dense=False,
#            return_sparse=True,
#            return_colbert_vecs=False,
#        )["lexical_weights"]
#        return [self._to_standard_dict(output) for output in outputs]
#
#    def encode_documents(self, documents: List[str]):
#        outputs = self.model.encode(
#            documents,
#            return_dense=False,
#            return_sparse=True,
#            return_colbert_vecs=False,
#        )["lexical_weights"]
#        return [self._to_standard_dict(output) for output in outputs]
#
#    def _to_standard_dict(self, raw_output):
#        result = {}
#        for k in raw_output:
#            result[int(k)] = raw_output[k]
#        return result

In [12]:
#vector_store = MilvusVectorStore(
#    dim=1536,
#    overwrite=True,
#    enable_sparse=True,
#    sparse_embedding_function=ExampleEmbeddingFunction(),
#    hybrid_ranker="RRFRanker",
#    hybrid_ranker_params={"k": 60},
#)