In [2]:
from pymilvus import MilvusClient, DataType
from dotenv import load_dotenv
import os

load_dotenv()
# https://milvus.io/docs/quickstart.md

client = MilvusClient(
    uri=os.getenv("MILVUS_URI"),
    user=os.getenv("MILVUS_USER"),
    password=os.getenv("MILVUS_PWD"),
)

In [6]:
# create schema
schema = client.create_schema(
    auto_id=True,
    enbale_dynamic_fields=True,
)
schema.add_field(
    field_name="pk",
    datatype=DataType.VARCHAR,
    is_primary=True,
    max_length=100
)
schema.add_field(
    field_name="binary_vector",
    datatype=DataType.BINARY_VECTOR,
    dim=128,
)

# create index params
index_params = client.prepare_index_params()
index_params.add_index(
    field_name="binary_vector",
    index_name="binary_vector_index",
    index_type="AUTOINDEX",
    metric_type="HAMMING",
)

# create collection
client.create_collection(
    collection_name="my_col_for_binary_vector",
    schema=schema,
    index_params=index_params,
)

# insert data
def convert_bool_list_to_bytes(bool_list):
    if len(bool_list)%8 != 0:
        raise ValueError("The length of a boolean list must be a multiple of 8")
    
    byte_array = bytearray(len(bool_list) // 8)
    for i, bit in enumerate(bool_list):
        if bit == 1:
            index = i // 8
            shift = i % 8
            byte_array[index] |= (1 << shift)
    return bytes(byte_array)

bool_vectors = [
    [1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0] + [0] * 112,
    [0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1] + [0] * 112,
]

data = [{"binary_vector": convert_bool_list_to_bytes(bool_vector) for bool_vector in bool_vectors}]

client.insert(
    collection_name="my_col_for_binary_vector",
    data=data,
)

{'insert_count': 1, 'ids': ['458312171707656147'], 'cost': 1}

In [7]:
# perform similarity search
search_params = {
    "params": {"nprobe": 10}
}

query_bool_list = [1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0] + [0] * 112
query_vector = convert_bool_list_to_bytes(query_bool_list)

res = client.search(
    collection_name="my_col_for_binary_vector",
    data=[query_vector],
    anns_field="binary_vector",
    search_params=search_params,
    limit=5,
    output_fields=["pk"],
)

print(res)

data: [[{'pk': '458312171707656147', 'distance': 10.0, 'entity': {'pk': '458312171707656147'}}]],{'cost': 6}
