Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Milvus integration for vector create and search #1269

Merged
merged 27 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9fa1266
Added Milvus integration for vector create and search
RichardZhangRZ Oct 9, 2023
2793799
Merge branch 'staging' into milvus-integration
RichardZhangRZ Oct 9, 2023
0459817
Added docs and switched to using provided values for Milvus
RichardZhangRZ Oct 10, 2023
f1c6e6c
Fixed merge conflicts
RichardZhangRZ Oct 10, 2023
02d6d24
Skip Milvus integration test
RichardZhangRZ Oct 10, 2023
cc72ac5
Removed unnecessary required param
RichardZhangRZ Oct 10, 2023
f6d3202
Fixed linter errors
RichardZhangRZ Oct 10, 2023
11d9bc8
Fixed doc issues
RichardZhangRZ Oct 10, 2023
400aed6
Some quick changes
RichardZhangRZ Oct 10, 2023
8d69263
Removed value
RichardZhangRZ Oct 10, 2023
8965a8e
Merge branch 'staging' into milvus-integration
RichardZhangRZ Oct 16, 2023
3444f17
Added linting suppression
RichardZhangRZ Oct 17, 2023
a12c4ee
test commit
RichardZhangRZ Oct 17, 2023
1c17276
Added more words to diciontary
RichardZhangRZ Oct 17, 2023
4815828
Temp change
RichardZhangRZ Oct 17, 2023
0e38d3b
Formatting
RichardZhangRZ Oct 17, 2023
d0ef3e7
Revert "Temp change"
RichardZhangRZ Oct 17, 2023
410361b
Skip Milvus installation for testing:
RichardZhangRZ Oct 18, 2023
a71dd5d
Resolved merge conflicts
RichardZhangRZ Oct 23, 2023
790dc02
adopted to configuration management changes
RichardZhangRZ Oct 24, 2023
b7b5cb6
Add skip marker
RichardZhangRZ Oct 24, 2023
3e77b2e
Add temp change
RichardZhangRZ Oct 24, 2023
d67371a
Revert "Add temp change"
RichardZhangRZ Oct 24, 2023
e578ed0
formatting
RichardZhangRZ Oct 24, 2023
c5ea4e4
Merge branch 'staging' into milvus-integration
xzdandy Oct 27, 2023
c0206b1
Remove milvus from circle ci installation
xzdandy Oct 27, 2023
15b4791
Update the documentation to use the new `SET` statement
xzdandy Oct 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,16 @@ jobs:
pip install --upgrade pip
if [ $RAY = "ENABLED" ]; then
if [ $PY_VERSION != "3.11" ]; then
pip install ".[dev,ray,qdrant,pinecone,chromadb]"
pip install ".[dev,ray,qdrant,pinecone,chromadb,milvus]"
else
pip install ".[dev,pinecone,chromadb]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864
pip install ".[dev,pinecone,chromadb,milvus]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864
fi
python -c "import yaml;f = open('evadb/evadb.yml', 'r+');config_obj = yaml.load(f, Loader=yaml.FullLoader);config_obj['experimental']['ray'] = True;f.seek(0);f.write(yaml.dump(config_obj));f.truncate();"
else
if [ $PY_VERSION != "3.11" ]; then
pip install ".[dev,ludwig,qdrant,pinecone,chromadb]"
pip install ".[dev,ludwig,qdrant,pinecone,chromadb,milvus]"
else
pip install ".[dev,pinecone,chromadb]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864
pip install ".[dev,pinecone,chromadb,milvus]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864
Copy link
Collaborator

@xzdandy xzdandy Oct 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove milvus since we are not running any test.

fi
fi

Expand Down Expand Up @@ -486,7 +486,7 @@ jobs:
source test_evadb/bin/activate
pip install --upgrade pip
pip debug --verbose
pip install ".[dev,ludwig,qdrant,forecasting,pinecone,chromadb]"
pip install ".[dev,ludwig,qdrant,forecasting,pinecone,chromadb,milvus]"
xzdandy marked this conversation as resolved.
Show resolved Hide resolved
source test_evadb/bin/activate
bash script/test/test.sh -m "<< parameters.mode >>"

Expand Down
1 change: 1 addition & 0 deletions docs/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ parts:
- file: source/reference/vector_stores/qdrant
- file: source/reference/vector_stores/pgvector
- file: source/reference/vector_stores/pinecone
- file: source/reference/vector_stores/milvus

- file: source/reference/ai/index
title: AI Engines
Expand Down
1 change: 1 addition & 0 deletions evadb/catalog/catalog_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class VectorStoreType(EvaDBEnum):
PINECONE # noqa: F821
PGVECTOR # noqa: F821
CHROMADB # noqa: F821
MILVUS # noqa: F821


class VideoColumnName(EvaDBEnum):
Expand Down
2 changes: 2 additions & 0 deletions evadb/executor/executor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def handle_vector_store_params(
return {"index_path": str(Path(index_path).parent)}
elif vector_store_type == VectorStoreType.PINECONE:
return {}
elif vector_store_type == VectorStoreType.MILVUS:
return {"index_dir": Path(str(Path(index_path).parent), "milvus")}
else:
raise ValueError("Unsupported vector store type: {}".format(vector_store_type))

Expand Down
3 changes: 2 additions & 1 deletion evadb/parser/evadb.lark
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function_metadata_key: uid

function_metadata_value: string_literal | decimal_literal

vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB)
vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | MILVUS)

index_elem: ("(" uid_list ")"
| "(" function_call ")")
Expand Down Expand Up @@ -423,6 +423,7 @@ QDRANT: "QDRANT"i
PINECONE: "PINECONE"i
PGVECTOR: "PGVECTOR"i
CHROMADB: "CHROMADB"i
MILVUS: "MILVUS"i

// Computer vision tasks

Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ def vector_store_type(self, tree):
vector_store_type = VectorStoreType.PGVECTOR
elif str.upper(token) == "CHROMADB":
vector_store_type = VectorStoreType.CHROMADB
elif str.upper(token) == "MILVUS":
vector_store_type = VectorStoreType.MILVUS
return vector_store_type


Expand Down
111 changes: 111 additions & 0 deletions evadb/third_party/vector_stores/milvus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import atexit
from typing import List

from evadb.third_party.vector_stores.types import (
FeaturePayload,
VectorIndexQuery,
VectorIndexQueryResult,
VectorStore,
)
from evadb.utils.generic_utils import try_to_import_milvus_client

_milvus_client_instance = None
_milvus_server_instance = None

required_params = ["index_dir"]


def get_local_milvus_server(index_dir: str):
global _milvus_server_instance
if _milvus_server_instance is None:
try_to_import_milvus_client()
import milvus

_milvus_server_instance = milvus.default_server
_milvus_server_instance.set_base_dir(index_dir)
_milvus_server_instance.start()

# Ensure that local Milvus server is terminated before Python process terminates
atexit.register(_milvus_server_instance.stop)
return _milvus_server_instance


def get_milvus_client(server_address: str, server_port: int):
global _milvus_client_instance
if _milvus_client_instance is None:
try_to_import_milvus_client()
import pymilvus

server_uri = f"http://{server_address}:{server_port}"
_milvus_client_instance = pymilvus.MilvusClient(uri=server_uri)

return _milvus_client_instance


class MilvusVectorStore(VectorStore):
def __init__(self, index_name: str, index_dir: str) -> None:
local_milvus_server = get_local_milvus_server(index_dir)
self._client = get_milvus_client(
server_address=local_milvus_server.server_address,
server_port=local_milvus_server.listen_port,
)
self._collection_name = index_name

def create(self, vector_dim: int):
if self._collection_name in self._client.list_collections():
self._client.drop_collection(self._collection_name)
self._client.create_collection(
collection_name=self._collection_name, dimension=vector_dim
)

def add(self, payload: List[FeaturePayload]):
milvus_data = [
{
"id": feature_payload.id,
"vector": feature_payload.embedding.reshape(-1).tolist(),
}
for feature_payload in payload
]
ids = [feature_payload.id for feature_payload in payload]

# Milvus Client does not have upsert operation, perform delete + insert to emulate it
self._client.delete(collection_name=self._collection_name, pks=ids)

self._client.insert(collection_name=self._collection_name, data=milvus_data)
xzdandy marked this conversation as resolved.
Show resolved Hide resolved

def persist(self):
self._client.flush(self._collection_name)
xzdandy marked this conversation as resolved.
Show resolved Hide resolved

def delete(self) -> None:
self._client.drop_collection(
collection_name=self._collection_name,
)

def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult:
response = self._client.search(
collection_name=self._collection_name,
data=[query.embedding.reshape(-1).tolist()],
limit=query.top_k,
)[0]

distances, ids = [], []
for result in response:
print(result)
distances.append(result["distance"])
ids.append(result["id"])

return VectorIndexQueryResult(distances, ids)
6 changes: 6 additions & 0 deletions evadb/third_party/vector_stores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from evadb.third_party.vector_stores.faiss import FaissVectorStore
from evadb.third_party.vector_stores.pinecone import PineconeVectorStore
from evadb.third_party.vector_stores.qdrant import QdrantVectorStore
from evadb.third_party.vector_stores.milvus import MilvusVectorStore
from evadb.utils.generic_utils import validate_kwargs


Expand Down Expand Up @@ -49,5 +50,10 @@ def init_vector_store(
validate_kwargs(kwargs, required_params, required_params)
return ChromaDBVectorStore(index_name, **kwargs)

elif vector_store_type == VectorStoreType.MILVUS:
from evadb.third_party.vector_stores.milvus import required_params

validate_kwargs(kwargs, required_params, required_params)
return MilvusVectorStore(index_name, **kwargs)
else:
raise Exception(f"Vector store {vector_store_type} not supported")
29 changes: 29 additions & 0 deletions evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,26 @@ def try_to_import_chromadb_client():
)


def try_to_import_milvus_client():
try:
import pymilvus
except:
raise ValueError(
"""Could not import pymilvus python package.
Please install it with 'pip install pymilvus`."""
)


def try_to_import_milvus_server():
try:
import milvus
except:
raise ValueError(
"""Could not import milvus python package.
Please install it with 'pip install milvus`."""
)


def is_qdrant_available() -> bool:
try:
try_to_import_qdrant_client()
Expand All @@ -586,6 +606,15 @@ def is_chromadb_available() -> bool:
return False


def is_milvus_available() -> bool:
try:
try_to_import_milvus_server()
try_to_import_milvus_client()
return True
except ValueError:
return False


##############################
## UTILS
##############################
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def read(path, encoding="utf-8"):

chromadb_libs = ["chromadb"]

milvus_libs = ["milvus[client]"]

postgres_libs = [
"psycopg2",
]
Expand Down Expand Up @@ -162,6 +164,7 @@ def read(path, encoding="utf-8"):
"qdrant": qdrant_libs,
"pinecone": pinecone_libs,
"chromadb": chromadb_libs,
"milvus": milvus_libs,
"postgres": postgres_libs,
"ludwig": ludwig_libs,
"sklearn": sklearn_libs,
Expand Down
36 changes: 35 additions & 1 deletion test/integration_tests/long/test_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
import os
import time
import unittest
from test.markers import chromadb_skip_marker, pinecone_skip_marker, qdrant_skip_marker
from test.markers import (
chromadb_skip_marker,
pinecone_skip_marker,
qdrant_skip_marker,
milvus_skip_marker,
)
from test.util import (
create_sample_image,
get_evadb_for_testing,
Expand Down Expand Up @@ -523,3 +528,32 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_pinecone(

drop_index_query = "DROP INDEX testpineconeindeximagedataset;"
execute_query_fetch_all(self.evadb, drop_index_query)

@milvus_skip_marker
def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus(
self,
):
for _ in range(2):
create_index_query = """CREATE INDEX testMilvusIndexImageDataset
ON testSimilarityImageDataset (DummyFeatureExtractor(data))
USING MILVUS;"""
execute_query_fetch_all(self.evadb, create_index_query)

select_query = """SELECT _row_id FROM testSimilarityImageDataset
ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data))
LIMIT 1;""".format(
self.img_path
)
explain_batch = execute_query_fetch_all(
self.evadb, f"EXPLAIN {select_query}"
)
self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0])

res_batch = execute_query_fetch_all(self.evadb, select_query)
self.assertEqual(
res_batch.frames["testsimilarityimagedataset._row_id"][0], 5
)

# Cleanup
drop_query = "DROP INDEX testMilvusIndexImageDataset"
execute_query_fetch_all(self.evadb, drop_query)
6 changes: 6 additions & 0 deletions test/markers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_ludwig_available,
is_pinecone_available,
is_qdrant_available,
is_milvus_available,
is_sklearn_available,
)

Expand All @@ -47,6 +48,11 @@
reason="Skipping since chromadb is not installed",
)

milvus_skip_marker = pytest.mark.skipif(
is_milvus_available() is False,
reason="Skipping since milvus and pymilvus is not installed",
)

windows_skip_marker = pytest.mark.skipif(
sys.platform == "win32", reason="Test case not supported on Windows"
)
Expand Down