Skip to content

Commit

Permalink
Merge pull request #17 from gaokun2/main
Browse files Browse the repository at this point in the history
python sdk for mochow.1.2
  • Loading branch information
fengjial committed May 23, 2024
2 parents cf8b23d + dbb407a commit 88064c6
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 21 deletions.
71 changes: 52 additions & 19 deletions example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,23 @@
from pymochow.configuration import Configuration
from pymochow.auth.bce_credentials import BceCredentials
from pymochow.exception import ClientError, ServerError
from pymochow.model.schema import Schema, Field, SecondaryIndex, VectorIndex, HNSWParams, AutoBuildTiming
from pymochow.model.schema import Schema, Field, SecondaryIndex, VectorIndex, HNSWParams, PUCKParams, AutoBuildTiming
from pymochow.model.enum import FieldType, IndexType, MetricType, ServerErrCode
from pymochow.model.enum import TableState, IndexState
from pymochow.model.table import Partition, Row, AnnSearch, HNSWSearchParams
from pymochow.model.table import Partition, Row, AnnSearch, HNSWSearchParams, PUCKSearchParams


logging.basicConfig(filename='example.log', level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class TestMochow:
def __init__(self, config):
def __init__(self, config, index_type):
"""
init mochow client
"""
self._client = pymochow.MochowClient(config)
self._index_type = index_type

def clear(self):
db = None
Expand Down Expand Up @@ -76,17 +77,26 @@ def create_db_and_table(self):
fields.append(Field("author", FieldType.STRING))
fields.append(Field("page", FieldType.UINT32))
fields.append(Field("segment", FieldType.STRING))
fields.append(Field("vector", FieldType.FLOAT_VECTOR, not_null=True, dimension=3))
fields.append(Field("vector", FieldType.FLOAT_VECTOR, not_null=True, dimension=4))
indexes = []
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.HNSW,

if self._index_type == IndexType.HNSW:
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.HNSW,
field="vector", metric_type=MetricType.L2,
params=HNSWParams(m=32, efconstruction=200)))
elif self._index_type == IndexType.PUCK:
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.PUCK,
field="vector", metric_type=MetricType.L2,
params=PUCKParams(coarseClusterCount=5, fineClusterCount=5)))
else:
raise Exception("not support index type")

indexes.append(SecondaryIndex(index_name="book_name_idx", field="bookName"))

db.create_table(
table_name=table_name,
replication=3,
partition=Partition(partition_num=3),
partition=Partition(partition_num=1),
schema=Schema(fields=fields, indexes=indexes)
)

Expand All @@ -106,40 +116,52 @@ def upsert_data(self):

rows = [
Row(id='0001',
vector=[0.2123, 0.21, 0.213],
vector=[1, 0.21, 0.213, 0],
bookName='西游记',
author='吴承恩',
page=21,
segment='富贵功名,前缘分定,为人切莫欺心。'),
Row(id='0002',
vector=[0.2123, 0.22, 0.213],
vector=[2, 0.22, 0.213, 0],
bookName='西游记',
author='吴承恩',
page=22,
segment='正大光明,忠良善果弥深。些些狂妄天加谴,眼前不遇待时临。'),
Row(id='0003',
vector=[0.2123, 0.23, 0.213],
vector=[3, 0.23, 0.213, 0],
bookName='三国演义',
author='罗贯中',
page=23,
segment='细作探知这个消息,飞报吕布。'),
Row(id='0004',
vector=[0.2123, 0.24, 0.213],
vector=[4, 0.24, 0.213, 0],
bookName='三国演义',
author='罗贯中',
page=24,
segment='布大惊,与陈宫商议。宫曰:“闻刘玄德新领徐州,可往投之。”' \
'布从其言,竟投徐州来。有人报知玄德。'),
Row(id='0005',
vector=[0.2123, 0.25, 0.213],
vector=[5, 0.25, 0.213, 0],
bookName='三国演义',
author='罗贯中',
page=25,
segment='玄德曰:“布乃当今英勇之士,可出迎之。”' \
'糜竺曰:“吕布乃虎狼之徒,不可收留;收则伤人矣。'),
]

i = 6
while i <= 100:
rows.append(Row(id=str(i),
vector=[i, 0.2 + i * 0.01, 0.213, 0],
bookName='三国演义',
author='罗贯中',
page=25,
segment='玄德曰:“布乃当今英勇之士,可出迎之。”' \
'糜竺曰:“吕布乃虎狼之徒,不可收留;收则伤人矣。'))
i += 1

table.upsert(rows=rows)
time.sleep(1)
time.sleep(10)

def change_table_schema(self):
"""change table schema"""
Expand Down Expand Up @@ -182,9 +204,13 @@ def search_data(self):
index = table.describe_index("vector_idx")
if index.state == IndexState.NORMAL:
break

anns = AnnSearch(vector_field="vector", vector_floats=[0.3123, 0.43, 0.213],
params=HNSWSearchParams(ef=200, limit=10), filter="bookName='三国演义'")

if self._index_type == IndexType.HNSW:
anns = AnnSearch(vector_field="vector", vector_floats=[1, 0.21, 0.213, 0],
params=HNSWSearchParams(ef=200, limit=10), filter="bookName='三国演义'")
elif self._index_type == IndexType.PUCK:
anns = AnnSearch(vector_field="vector", vector_floats=[1, 0.21, 0.213, 0],
params=PUCKSearchParams(searchCoarseCount=5, limit=5), filter="bookName='三国演义'")
res = table.search(anns=anns)
logger.debug("res: {}".format(res))

Expand Down Expand Up @@ -212,9 +238,16 @@ def drop_and_create_vindex(self):
break

indexes = []
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.HNSW,
if self._index_type == IndexType.HNSW:
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.HNSW,
field="vector", metric_type=MetricType.L2,
params=HNSWParams(m=16, efconstruction=200), auto_build=False))
elif self._index_type == IndexType.PUCK:
indexes.append(VectorIndex(index_name="vector_idx", index_type=IndexType.PUCK,
field="vector", metric_type=MetricType.L2,
params=PUCKParams(coarseClusterCount=5, fineClusterCount=5), auto_build=False))
else:
raise Exception("not support index type")
table.create_indexes(indexes)
time.sleep(1)
table.modify_index(index_name="vector_idx", auto_build=True,
Expand All @@ -234,12 +267,12 @@ def delete_and_drop(self):

if __name__ == "__main__":
account = 'root'
api_key = 'your api key'
endpoint = 'your endpoint' #example:http://127.0.0.1:8511
api_key = '*********'
endpoint = 'http://*.*.*.*:*' #example:http://127.0.0.1:8511

config = Configuration(credentials=BceCredentials(account, api_key),
endpoint=endpoint)
test_vdb = TestMochow(config)
test_vdb = TestMochow(config, IndexType.HNSW)
test_vdb.clear()
test_vdb.create_db_and_table()
test_vdb.upsert_data()
Expand Down
11 changes: 10 additions & 1 deletion pymochow/model/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pymochow import client
from pymochow.http import http_methods
from pymochow.model.table import Table, Partition
from pymochow.model.schema import Schema, Field, VectorIndex, SecondaryIndex, HNSWParams
from pymochow.model.schema import Schema, Field, VectorIndex, SecondaryIndex, HNSWParams, PUCKParams
from pymochow.model.enum import IndexType, MetricType, TableState

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -277,6 +277,15 @@ def describe_table(self, table_name, config=None) -> Table:
field=index["field"],
metric_type=getattr(MetricType, index["metricType"], None),
auto_build=index["autoBuild"]))
elif index["indexType"] == IndexType.PUCK.value:
indexes.append(VectorIndex(
index_name=index["indexName"],
index_type=IndexType.PUCK,
field=index["field"],
metric_type=getattr(MetricType, index["metricType"], None),
params=PUCKParams(coarseClusterCount=index["params"]["coarseClusterCount"],
fineClusterCount=index["params"]["fineClusterCount"]),
auto_build=index["autoBuild"]))
elif index["indexType"] == IndexType.SECONDARY_INDEX.value:
indexes.append(SecondaryIndex(
index_name=index["indexName"],
Expand Down
1 change: 1 addition & 0 deletions pymochow/model/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class IndexType(Enum):
# vector index type
HNSW = "HNSW"
FLAT = "FLAT"
PUCK = "PUCK"

# scalar index type
SECONDARY_INDEX = "SECONDARY"
Expand Down
18 changes: 18 additions & 0 deletions pymochow/model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,24 @@ def to_dict(self):
return res


class PUCKParams:
"""
The puck vector index params.
"""

def __init__(self, coarseClusterCount: int, fineClusterCount: int) -> None:
self.coarseClusterCount = coarseClusterCount
self.fineClusterCount = fineClusterCount

def to_dict(self):
"""to dict"""
res = {
"coarseClusterCount": self.coarseClusterCount,
"fineClusterCount": self.fineClusterCount
}
return res


class VectorIndex(IndexField):
"""
Args:
Expand Down
27 changes: 26 additions & 1 deletion pymochow/model/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pymochow import utils
from pymochow import client
from pymochow.http import http_methods
from pymochow.model.schema import VectorIndex, SecondaryIndex, HNSWParams, DefaultAutoBuildPolicy
from pymochow.model.schema import VectorIndex, SecondaryIndex, HNSWParams, PUCKParams, DefaultAutoBuildPolicy
from pymochow.model.enum import PartitionType, ReadConsistency
from pymochow.model.enum import IndexType, IndexState, MetricType, AutoBuildPolicyType
from pymochow.exception import ClientError
Expand Down Expand Up @@ -445,6 +445,16 @@ def describe_index(self, index_name, config=None):
metric_type=getattr(MetricType, index["metricType"], None),
auto_build=index["autoBuild"],
state=getattr(IndexState, index["state"], None))
elif index["indexType"] == IndexType.PUCK.value:
return VectorIndex(
index_name=index["indexName"],
index_type=IndexType.PUCK,
field=index["field"],
metric_type=getattr(MetricType, index["metricType"], None),
params=PUCKParams(coarseClusterCount=index["params"]["coarseClusterCount"],
fineClusterCount=index["params"]["fineClusterCount"]),
auto_build=index["autoBuild"],
state=getattr(IndexState, index["state"], None))
elif index["indexType"] == IndexType.SECONDARY_INDEX.value:
return SecondaryIndex(
index_name=index["indexName"],
Expand Down Expand Up @@ -531,6 +541,21 @@ def to_dict(self):
res['pruning'] = self._pruning
return res

class PUCKSearchParams:
"puck search params"

def __init__(self, searchCoarseCount, limit=50) -> None:
self._limit = limit
self._searchCoarseCount = searchCoarseCount

def to_dict(self):
"""to dict"""
res = {}

res['searchCoarseCount'] = self._searchCoarseCount
res['limit'] = self._limit

return res
class FLATSearchParams:
"flat search params"

Expand Down

0 comments on commit 88064c6

Please sign in to comment.