In [1]:
from pymilvus import (
    connections,
    utility,
    FieldSchema, CollectionSchema, DataType,
    Collection,
    db
)
import numpy as np

connections.connect(adress="localhost:19530", db_name="wekiwi")

db.create_database("wekiwi_test3")
db.using_database("wekiwi_test3")

# Define the fields for the "Card" collection
fields = [
    FieldSchema(name="milvus_id", dtype=DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema(name="description_embeddings", dtype=DataType.FLOAT_VECTOR, dim=768),
    #FieldSchema(name="topic_embeddings", dtype=DataType.FLOAT_VECTOR, dim=768), #not yet supported! coming in Milvus 2.4
    FieldSchema(name="_id", dtype=DataType.VARCHAR, max_length=25),
    FieldSchema(name="last_changed", dtype=DataType.DOUBLE),
    FieldSchema(name="accepted_as_solution_counts", dtype=DataType.INT32),
    FieldSchema(name="author", dtype=DataType.VARCHAR, max_length=25),
    FieldSchema(name="circles", dtype=DataType.JSON),
    FieldSchema(name="description", dtype=DataType.VARCHAR, max_length=50000),
    FieldSchema(name="interaction_counts", dtype=DataType.INT32),
    FieldSchema(name="is_global", dtype=DataType.BOOL),
    FieldSchema(name="is_trashed", dtype=DataType.BOOL),
    FieldSchema(name="keywords", dtype=DataType.JSON),
    FieldSchema(name="platform_id", dtype=DataType.VARCHAR, max_length=35),
    FieldSchema(name="solution_counts", dtype=DataType.INT32),
    FieldSchema(name="suggested_as_solution_counts", dtype=DataType.INT32),
    FieldSchema(name="suggestion_counts", dtype=DataType.INT32),
    FieldSchema(name="title", dtype=DataType.VARCHAR, max_length=500),
    FieldSchema(name="topics", dtype=DataType.JSON),
    FieldSchema(name="view_counts", dtype=DataType.INT32)
]

# Create the schema for the "Card" collection
schema = CollectionSchema(fields, "Collection of cards")

# Create the "Card" collection in the "wekiwi" database
card_collection = Collection("Card", schema)

In [2]:
# Index for 'description_embeddings' using HNSW and COSINE metric
description_embeddings_index_params = {
    "index_type": "HNSW",
    "metric_type": "COSINE",
    "params": {"M": 24, "efConstruction": 500},
}
card_collection.create_index(field_name="description_embeddings", index_params=description_embeddings_index_params)

# Index for 'id' using marisa-trie
id_index_params = {
    "index_type": "marisa-trie",
    "metric_type": "None"  # Adjust if a specific metric type is required
}
card_collection.create_index(field_name="_id", index_params=id_index_params)

# Index for 'platform_id' using marisa-trie
platform_id_index_params = {
    "index_type": "marisa-trie",
    "metric_type": "None"  # Adjust if a specific metric type is required
}
card_collection.create_index(field_name="platform_id", index_params=platform_id_index_params)

Status(code=0, message=)

In [5]:
import json

# Define the random number generator
rng = np.random.default_rng(seed=19530)

# Prepare entities for insertion
record = {
    "milvus_id": 556017554246539999,
    "description_embeddings": np.random.rand(768).tolist(),  # Random float vector of dim 768
    "_id": "sample_id_001",  # Sample VARCHAR
    "last_changed": np.random.rand(),  # Random DOUBLE
    "accepted_as_solution_counts": np.random.randint(0, 100),  # Sample INT32
    "author": "author_name",  # Sample VARCHAR
    "circles": {"circles": [1,2,3,5]},  # Sample JSON "data1","data5"
    "description": "This is a sample description.",  # Sample VARCHAR
    "interaction_counts": np.random.randint(0, 100),  # Sample INT32
    "is_global": True,  # Sample BOOL
    "is_trashed": False,  # Sample BOOL
    "keywords": {"keywords": ["keyword1", "keyword2", "keyword3"]},  # Sample JSON
    "platform_id": "platform_12345",  # Sample VARCHAR
    "solution_counts": np.random.randint(0, 100),  # Sample INT32
    "suggested_as_solution_counts": np.random.randint(0, 100),  # Sample INT32
    "suggestion_counts": np.random.randint(0, 100),  # Sample INT32
    "title": "Sample Title",  # Sample VARCHAR
    "topics": {"topics": ["detail1", "topic2", "detail2"]},  # Sample JSON
    "view_counts": np.random.randint(0, 1000),  # Sample INT32
}

# Prepare the entity for insertion, note that 'milvus_id' is not included as it's auto-generated
entity = [
    [value] for value in record.values()
]

print(entity)

# Insert data into Milvus
insert_result = card_collection.insert(entity)
card_collection.flush()

# Check the number of entities
print(f"Number of entities in Milvus: {card_collection.num_entities}")
print(f"Cards inserted: {insert_result}")

[[556017554246539999], [[0.9764576282709363, 0.37387877870399644, 0.25485024423638214, 0.37050909323316417, 0.5596287945566001, 0.46741594507319373, 0.1948134191431079, 0.7819344403087134, 0.8026061675177675, 0.4858654485707483, 0.2228953722150916, 0.398792003232711, 0.1640335961065752, 0.5760759431919069, 0.26262954579141273, 0.768613717468881, 0.09808986901016481, 0.9956786300886518, 0.558289241692329, 0.6762427851229446, 0.19508824086659649, 0.1516143143769798, 0.9337400100664173, 0.9818164968262882, 0.4654144631660222, 0.4925972436961724, 0.0444882149768151, 0.7883975164153161, 0.28878760780052226, 0.7914634281813806, 0.10448266517506033, 0.05377196508744275, 0.6566015940272308, 0.24005911013907089, 0.16033191780508382, 0.7800799366224922, 0.9290093990844306, 0.19827226820498522, 0.9696746731121632, 0.3508901463402556, 0.9003281751979734, 0.06301095156204994, 0.9438006737100404, 0.532375714114694, 0.9284409462692467, 0.9355041184917454, 0.6776141913084621, 0.6620322241093118, 0.198

In [None]:
data_to_upsert = {
    "milvus_id": 446093996005786499,
    "_id": "sample_id_001",  # Sample VARCHAR
    "description_embeddings": np.random.rand(768).tolist(),  # Random float vector of dim 768
    "last_changed": np.random.rand(),  # Random DOUBLE
    "accepted_as_solution_counts": np.random.randint(0, 100),  # Sample INT32
    "author": "author_name",  # Sample VARCHAR
    "circles": {"circles": [5,7,99]},  # Sample JSON "data1","data5"
    "description": "This is a sample description.",  # Sample VARCHAR
    "interaction_counts": np.random.randint(0, 100),  # Sample INT32
    "is_global": True,  # Sample BOOL
    "is_trashed": False,  # Sample BOOL
    "keywords": {"keywords": ["keyword1", "keyword2", "keyword3"]},  # Sample JSON
    "platform_id": "platform_12345",  # Sample VARCHAR
    "solution_counts": np.random.randint(0, 100),  # Sample INT32
    "suggested_as_solution_counts": np.random.randint(0, 100),  # Sample INT32
    "suggestion_counts": np.random.randint(0, 100),  # Sample INT32
    "title": "Sample Title",  # Sample VARCHAR
    "topics": {"topics": ["detail1", "topic2", "detail2"]},  # Sample JSON
    "view_counts": np.random.randint(0, 1000),  # Sample INT32
}

data = [
    [value] for value in data_to_upsert.values()
]

print(data)

#upsert_result = card_collection.upsert(data)
card_collection.flush()

print(f"Number of entities in Milvus: {card_collection.num_entities}")
#print(f"Cards upserted: {upsert_result}")

UpsertAutoIDTrueException: <UpsertAutoIDTrueException: (code=1, message=Upsert don't support autoid == true)>