In [None]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
# Connect to Milvus
connections.connect(alias="default", host="localhost", port="19530")

embedding_model = "mxbai-embed-large"
embedding_dim = 1024

# Define Milvus Collection Schema
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="ref", dtype=DataType.VARCHAR, max_length=255),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=1024),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=embedding_dim),  # Adjust based on embedding model
]

# Create Collection
schema = CollectionSchema(fields)

#utility.drop_collection("bible_verses")
bible_collection = Collection("bible_verses", schema)

print("✅ Connected to Milvus & Created Collection!")


✅ Connected to Milvus & Created Collection!


In [12]:
import ollama

def get_embedding(verse_text, embedding_model="mxbai-embed-large"):
    response = ollama.embed(embedding_model, verse_text)
    embedding = response["embeddings"][0]
    return embedding

embedding_model = "mxbai-embed-large"
test = get_embedding("test")
print(test)    


[0.005731759, 0.010537944, 0.0016845512, 0.024038142, -0.025523452, 0.014682744, 0.00015561887, 0.02183392, 0.028394217, 0.04931835, 0.017624497, 0.007665976, 0.003405394, -0.050492026, -0.020013435, -0.018857952, -0.028328016, -9.500858e-05, -0.023658263, -0.011210882, -0.019538393, 0.008420351, -0.058704127, 0.0015846783, -0.020955259, 0.030942515, -0.0035097222, -0.0015744482, 0.017112436, 0.032588467, -0.011282272, -0.0057915417, 0.017330427, -0.050471846, 0.009417275, -0.034654427, 0.051105868, -0.01793316, -0.0036478858, -0.022766616, -0.016472023, -0.0025521687, 0.039051313, -0.024154905, -0.06912467, -0.024370521, -0.02545611, -0.04881892, -0.01223596, -0.018349485, 0.0051936987, 0.009239987, 0.0148323225, -0.03828174, 0.0060546263, -0.012810752, -0.012628406, 0.0063205264, -0.046119776, 0.06753281, 0.047234293, -0.0079748025, 0.021387786, -0.05372062, 0.013391648, -0.0066134664, 0.012866523, -0.019948298, -0.00741774, -0.024527336, -0.024717648, 0.025961334, -0.01038703, -0.03

In [13]:
import sqlite3

def load_bible_into_milvus():
    conn = sqlite3.connect("bible.db")
    cursor = conn.cursor()
    cursor.execute("SELECT verse, text FROM bible_verses")
    verses = cursor.fetchall()
    
    total = len(verses)  # Get total count
    for i, (verse_ref, verse_text) in enumerate(verses, start=1):
        embedding = get_embedding(verse_text)
        if embedding:
            bible_collection.insert([
                [verse_ref],
                [verse_text],
                [embedding]
            ])
        
        # Print progress every 100 verses
        if i % 100 == 0:
            print(f"🔄 Processed {i}/{total} verses...")

    conn.close()
    print("✅ Bible verses successfully loaded into Milvus!")

# Run the function
load_bible_into_milvus()


🔄 Processed 100/31102 verses...
🔄 Processed 200/31102 verses...
🔄 Processed 300/31102 verses...
🔄 Processed 400/31102 verses...
🔄 Processed 500/31102 verses...
🔄 Processed 600/31102 verses...
🔄 Processed 700/31102 verses...
🔄 Processed 800/31102 verses...
🔄 Processed 900/31102 verses...
🔄 Processed 1000/31102 verses...
🔄 Processed 1100/31102 verses...
🔄 Processed 1200/31102 verses...
🔄 Processed 1300/31102 verses...
🔄 Processed 1400/31102 verses...
🔄 Processed 1500/31102 verses...
🔄 Processed 1600/31102 verses...
🔄 Processed 1700/31102 verses...
🔄 Processed 1800/31102 verses...
🔄 Processed 1900/31102 verses...
🔄 Processed 2000/31102 verses...
🔄 Processed 2100/31102 verses...
🔄 Processed 2200/31102 verses...
🔄 Processed 2300/31102 verses...
🔄 Processed 2400/31102 verses...
🔄 Processed 2500/31102 verses...
🔄 Processed 2600/31102 verses...
🔄 Processed 2700/31102 verses...
🔄 Processed 2800/31102 verses...
🔄 Processed 2900/31102 verses...
🔄 Processed 3000/31102 verses...
🔄 Processed 3100/31

In [None]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection

# Connect to Milvus
connections.connect(alias="default", host="localhost", port="19530")

# Define Milvus Collection Schema for Shakespeare
shakespeare_fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="ref", dtype=DataType.VARCHAR, max_length=255),
    FieldSchema(name="sentence", dtype=DataType.VARCHAR, max_length=1024),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1024),  # Match model output
]

# Create Shakespeare Collection
shakespeare_schema = CollectionSchema(shakespeare_fields)
shakespeare_collection = Collection("shakespeare_sentences", shakespeare_schema)

print("✅ Connected to Milvus & Created Shakespeare Collection!")


✅ Connected to Milvus & Created Shakespeare Collection!


In [16]:
import sqlite3

# Load Shakespeare sentences into Milvus with progress printing
def load_shakespeare_into_milvus():
    conn = sqlite3.connect("shakespeare.db")
    cursor = conn.cursor()
    cursor.execute("SELECT id, sentence FROM shakespeare_sentences")
    sentences = cursor.fetchall()

    total_sentences = len(sentences)  # Get total count
    processed = 0

    for sentence_id, sentence_text in sentences:
        embedding = get_embedding(sentence_text)
        if embedding:
            shakespeare_collection.insert([
                [sentence_id],  # Auto-generated ID
                [sentence_text],
                [embedding]
            ])
            processed += 1

        # Print progress every 100 sentences
        if processed % 100 == 0 or processed == total_sentences:
            percent_done = (processed / total_sentences) * 100
            print(f"🔄 Processed {processed}/{total_sentences} sentences ({percent_done:.2f}% done)")

    conn.close()
    print("✅ Shakespeare sentences successfully loaded into Milvus!")

# Run the process
load_shakespeare_into_milvus()
print("🎉 All data loaded into Milvus!")

DataNotMatchException: <DataNotMatchException: (code=1, message=The data doesn't match with schema fields, expect 2 list, got 3)>