In [7]:
# Install necessary Milvus and Ollama related packages
%pip install --upgrade pymilvus
%pip install pymilvus pymilvus[model] requests python-dotenv


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
from dotenv import load_dotenv
from pymilvus import connections, Collection, utility
from pymilvus.model.dense import JinaEmbeddingFunction

# Load environment variables
load_dotenv()

MILVUS_HOST = "milvus-standalone"
MILVUS_PORT = "19530"
MILVUS_ALIAS = "default"
JINAAI_API_KEY = "your_jinaai_api_key"

if MILVUS_HOST and MILVUS_PORT and MILVUS_ALIAS and JINAAI_API_KEY:
    print(f"Environment variables loaded successfully.")
else:
    print("Error: Missing one or more environment variables. Please check .env file.")


Environment variables loaded successfully.


In [12]:
def connect_milvus():
    try:
        connections.connect(
            alias=MILVUS_ALIAS,
            host=MILVUS_HOST,
            port=MILVUS_PORT
        )
        print(f"Connected to Milvus at {MILVUS_HOST}:{MILVUS_PORT}")
    except Exception as e:
        print(f"Failed to connect to Milvus: {e}")

def disconnect_milvus():
    try:
        connections.disconnect(MILVUS_ALIAS)
        print("Disconnected from Milvus")
    except Exception as e:
        print(f"Failed to disconnect: {e}")

connect_milvus()



Connected to Milvus at milvus-standalone:19530


In [9]:
# Instantiate JinaEmbeddingFunction
jina_ef = JinaEmbeddingFunction(
    model_name="jina-embeddings-v3",
    api_key=JINAAI_API_KEY,
    task="retrieval.passage", 
    dimensions=1024  
)

# Generate embeddings for documents
def generate_embeddings(docs):
    return jina_ef.encode_documents(docs)

# Example documents
docs = [
    "This is a sample document.",
    "Testing embeddings with Milvus and Jina AI.",
    "Integrating RAG with LLMs and vector databases."
]

embeddings = generate_embeddings(docs)
print("Generated embeddings:", embeddings)



Generated embeddings: [array([-0.0301056 ,  0.06172477,  0.09530249, ...,  0.01452535,
       -0.01659705, -0.00817707]), array([-0.03732415, -0.10157046,  0.05718902, ..., -0.02387032,
        0.01111137,  0.01238927]), array([ 0.05010217, -0.0464141 , -0.01115789, ..., -0.00679098,
        0.0099143 , -0.00074127])]


In [14]:
from pymilvus import CollectionSchema, FieldSchema, DataType


connect_milvus()

def create_document_schema():
    fields = [
        FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=100),
        FieldSchema(name="chunk_id", dtype=DataType.INT64, is_primary=True, auto_id=True),
        FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1024),
        FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535)
    ]
    schema = CollectionSchema(fields=fields, description="Document embeddings")
    return schema

def create_document_collection():
    schema = create_document_schema()
    collection_name = "document_embeddings"
    
    if not utility.has_collection(collection_name):
        collection = Collection(name=collection_name, schema=schema)
        index_params = {
            "index_type": "IVF_FLAT",
            "metric_type": "IP",
            "params": {"nlist": 1024}
        }
        collection.create_index(field_name="embedding", index_params=index_params)
        print(f"Collection '{collection_name}' and index created!")
    else:
        collection = Collection(name=collection_name)
        print(f"Collection '{collection_name}' already exists.")
    
    collection.load()
    return collection
collection = create_document_collection()



Connected to Milvus at milvus-standalone:19530
Collection 'document_embeddings' and index created!


In [15]:
# class OllamaModel:
#     def __init__(self, model_name="llama3.2"):
#         self.model_name = model_name

#     def generate_embedding(self, text):
#         try:
#             data = {"model": self.model_name, "input": text}
#             response = requests.post(OLLAMA_API_URL, json=data)
#             response.raise_for_status()
#             embeddings = response.json().get("embeddings", None)
#             if embeddings:
#                 return embeddings[0]
#             return None
#         except Exception as e:
#             print(f"Failed to generate embedding: {e}")
#             return None

# ollama_model = OllamaModel()
# sample_text = "This is a test text for embedding generation."
# ollama_model.generate_embedding(sample_text)


[-0.0073739453,
 -0.015118722,
 -0.020366393,
 0.00806239,
 0.0051146676,
 -0.011292885,
 0.006869915,
 0.0052435114,
 0.0029758653,
 -0.020780496,
 -0.015073377,
 0.020758409,
 -0.0004809539,
 -0.0055393823,
 -0.038921963,
 0.002575193,
 0.00861944,
 0.001889529,
 0.0061671087,
 -0.033166286,
 0.009354845,
 -0.0065324064,
 0.029379383,
 -0.035753287,
 0.0076112063,
 0.0060868026,
 -0.018315447,
 0.012252594,
 0.011621446,
 0.02418749,
 -0.00645961,
 0.012730533,
 0.0065383916,
 -0.0017845648,
 -0.0009960848,
 -0.017342048,
 0.007970579,
 -0.0062117977,
 0.005452755,
 -0.0077269655,
 -0.001845205,
 -0.00336854,
 0.0077589345,
 0.0109349135,
 -0.0036526364,
 0.0059176106,
 -0.0009376629,
 0.011213942,
 -0.0038581514,
 0.0035902122,
 -0.012025434,
 0.018023407,
 -0.010397155,
 0.023281597,
 -0.013199995,
 0.0064418316,
 0.018615937,
 -0.0062036435,
 0.013768182,
 -0.011717664,
 0.012215901,
 0.03168194,
 0.007685868,
 -0.011049803,
 0.043466814,
 -0.01519226,
 0.009542688,
 -0.0105707925

In [15]:
# Instantiate JinaEmbeddingFunction
jina_ef = JinaEmbeddingFunction(
    model_name="jina-embeddings-v3", 
    api_key=JINAAI_API_KEY,
    task="retrieval.passage", 
    dimensions=1024 
)

# Generate embeddings for documents
def generate_embeddings(docs):
    return jina_ef.encode_documents(docs)

# Example documents
docs = [
    "This is a sample document.",
    "Testing embeddings with Milvus and Jina AI.",
    "Integrating RAG with LLMs and vector databases."
]

embeddings = generate_embeddings(docs)
print("Generated embeddings:", embeddings)


Generated embeddings: [array([-0.0301056 ,  0.06172477,  0.09530249, ...,  0.01452535,
       -0.01659705, -0.00817707]), array([-0.03732415, -0.10157046,  0.05718902, ..., -0.02387032,
        0.01111137,  0.01238927]), array([ 0.05010217, -0.0464141 , -0.01115789, ..., -0.00679098,
        0.0099143 , -0.00074127])]


In [16]:
def insert_data_into_collection(collection, data):
    try:
        # Generate embeddings using JinaEmbeddingFunction
        embeddings = generate_embeddings(data["text"])
        
        # Check if all embeddings were successfully generated
        if len(embeddings) != len(data["text"]):
            print("Error: Failed to generate embeddings for some documents.")
            return

        # Insert into Milvus
        collection.insert([data["doc_id"], embeddings, data["text"]])
        print(f"Data inserted into collection: {collection.name}")
        
    except Exception as e:
        print(f"Failed to insert data: {e}")



In [17]:

from pymilvus.model.dense import JinaEmbeddingFunction
JINAAI_API_KEY = os.getenv("JINAAI_API_KEY", "your_jinaai_api_key")

# Initialize JinaEmbeddingFunction for document embedding
jina_ef = JinaEmbeddingFunction(
    model_name="jina-embeddings-v3", 
    api_key=JINAAI_API_KEY,
    task="retrieval.passage",
    dimensions=1024 
)

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England."
]

# Encode the documents into embeddings
docs_embeddings = jina_ef.encode_documents(docs)

# Check the dimensions and embedding shapes
print("Embeddings:", docs_embeddings)
print("Embedding Dimensions:", jina_ef.dim, docs_embeddings[0].shape)

# Inserting the generated embeddings into Milvus
def insert_data_into_collection(collection, doc_ids, embeddings, texts):
    try:
        entities = [doc_ids, embeddings, texts]
        collection.insert(entities)
        print(f"Data inserted into collection: {collection.name}")
    except Exception as e:
        print(f"Failed to insert data: {e}")

# Assuming the Milvus collection has been created earlier
collection = Collection("document_embeddings")

# Insert document embeddings
insert_data_into_collection(collection, ["doc1", "doc2", "doc3"], docs_embeddings, docs)




Embeddings: [array([ 0.0980642 , -0.08516974,  0.07365319, ..., -0.00991382,
        0.00062028,  0.01226388]), array([ 0.09150343, -0.10842073,  0.1076443 , ..., -0.01918848,
        0.0101535 , -0.0038277 ]), array([-0.01339411, -0.0968136 ,  0.09111101, ..., -0.04007763,
        0.03253692, -0.00142691])]
Embedding Dimensions: 1024 (1024,)
Data inserted into collection: document_embeddings
