Skip to content

Commit

Permalink
fix: passes max_element when load index in hnswlib (#1426)
Browse files Browse the repository at this point in the history
Signed-off-by: AnneY <evangeline-lun@foxmail.com>
  • Loading branch information
AnneYang720 committed Apr 21, 2023
1 parent bf2e50c commit 0c73ad8
Show file tree
Hide file tree
Showing 22 changed files with 853 additions and 869 deletions.
4 changes: 3 additions & 1 deletion docarray/index/backends/hnswlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,9 @@ def _to_hashed_id(doc_id: Optional[str]) -> int:
def _load_index(self, col_name: str, col: '_ColumnInfo') -> hnswlib.Index:
"""Load an existing HNSW index from disk."""
index = self._create_index_class(col)
index.load_index(self._hnsw_locations[col_name])
index.load_index(
self._hnsw_locations[col_name], max_elements=col.config['max_elements']
)
return index

# HNSWLib helpers
Expand Down
258 changes: 129 additions & 129 deletions tests/index/base_classes/test_base_doc_store.py

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions tests/index/base_classes/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,50 +63,50 @@ def python_type_to_db_type(self, x):


def test_defaults():
store = DummyDocIndex[SimpleDoc]()
assert store._db_config.other == 5
assert store._db_config.work_dir == '.'
assert store._runtime_config.default_column_config[str] == {
index = DummyDocIndex[SimpleDoc]()
assert index._db_config.other == 5
assert index._db_config.work_dir == '.'
assert index._runtime_config.default_column_config[str] == {
'dim': 128,
'space': 'l2',
}


def test_set_by_class():
# change all settings
store = DummyDocIndex[SimpleDoc](DBConfig(work_dir='hi', other=10))
assert store._db_config.other == 10
assert store._db_config.work_dir == 'hi'
store.configure(RuntimeConfig(default_column_config={}, default_ef=10))
assert store._runtime_config.default_column_config == {}
index = DummyDocIndex[SimpleDoc](DBConfig(work_dir='hi', other=10))
assert index._db_config.other == 10
assert index._db_config.work_dir == 'hi'
index.configure(RuntimeConfig(default_column_config={}, default_ef=10))
assert index._runtime_config.default_column_config == {}

# change only some settings
store = DummyDocIndex[SimpleDoc](DBConfig(work_dir='hi'))
assert store._db_config.other == 5
assert store._db_config.work_dir == 'hi'
store.configure(RuntimeConfig(default_column_config={}))
assert store._runtime_config.default_column_config == {}
index = DummyDocIndex[SimpleDoc](DBConfig(work_dir='hi'))
assert index._db_config.other == 5
assert index._db_config.work_dir == 'hi'
index.configure(RuntimeConfig(default_column_config={}))
assert index._runtime_config.default_column_config == {}


def test_set_by_kwargs():
# change all settings
store = DummyDocIndex[SimpleDoc](work_dir='hi', other=10)
assert store._db_config.other == 10
assert store._db_config.work_dir == 'hi'
store.configure(default_column_config={}, default_ef=10)
assert store._runtime_config.default_column_config == {}
index = DummyDocIndex[SimpleDoc](work_dir='hi', other=10)
assert index._db_config.other == 10
assert index._db_config.work_dir == 'hi'
index.configure(default_column_config={}, default_ef=10)
assert index._runtime_config.default_column_config == {}

# change only some settings
store = DummyDocIndex[SimpleDoc](work_dir='hi')
assert store._db_config.other == 5
assert store._db_config.work_dir == 'hi'
store.configure(default_column_config={})
assert store._runtime_config.default_column_config == {}
index = DummyDocIndex[SimpleDoc](work_dir='hi')
assert index._db_config.other == 5
assert index._db_config.work_dir == 'hi'
index.configure(default_column_config={})
assert index._runtime_config.default_column_config == {}


def test_default_column_config():
store = DummyDocIndex[SimpleDoc]()
assert store._runtime_config.default_column_config == {
index = DummyDocIndex[SimpleDoc]()
assert index._runtime_config.default_column_config == {
str: {
'dim': 128,
'space': 'l2',
Expand Down
30 changes: 15 additions & 15 deletions tests/index/elastic/v7/test_column_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@ class MyDoc(BaseDoc):
text: str
color: str = Field(col_type='keyword')

store = ElasticV7DocIndex[MyDoc]()
index = ElasticV7DocIndex[MyDoc]()
index_docs = [
MyDoc(id='0', text='hello world', color='red'),
MyDoc(id='1', text='never gonna give you up', color='blue'),
MyDoc(id='2', text='we are the world', color='green'),
]
store.index(index_docs)
index.index(index_docs)

query = 'world'
docs, _ = store.text_search(query, search_field='text')
docs, _ = index.text_search(query, search_field='text')
assert [doc.id for doc in docs] == ['0', '2']

filter_query = {'terms': {'color': ['red', 'blue']}}
docs = store.filter(filter_query)
docs = index.filter(filter_query)
assert [doc.id for doc in docs] == ['0', '1']


Expand All @@ -44,33 +44,33 @@ class MyDoc(BaseDoc):
}
)

store = ElasticV7DocIndex[MyDoc]()
index = ElasticV7DocIndex[MyDoc]()
doc = [
MyDoc(manager={'age': 25, 'name': {'first': 'Rachel', 'last': 'Green'}}),
MyDoc(manager={'age': 30, 'name': {'first': 'Monica', 'last': 'Geller'}}),
MyDoc(manager={'age': 35, 'name': {'first': 'Phoebe', 'last': 'Buffay'}}),
]
store.index(doc)
index.index(doc)
id_ = doc[0].id
assert store[id_].id == id_
assert store[id_].manager == doc[0].manager
assert index[id_].id == id_
assert index[id_].manager == doc[0].manager

filter_query = {'range': {'manager.age': {'gte': 30}}}
docs = store.filter(filter_query)
docs = index.filter(filter_query)
assert [doc.id for doc in docs] == [doc[1].id, doc[2].id]


def test_field_geo_point():
class MyDoc(BaseDoc):
location: dict = Field(col_type='geo_point')

store = ElasticV7DocIndex[MyDoc]()
index = ElasticV7DocIndex[MyDoc]()
doc = [
MyDoc(location={'lat': 40.12, 'lon': -72.34}),
MyDoc(location={'lat': 41.12, 'lon': -73.34}),
MyDoc(location={'lat': 42.12, 'lon': -74.34}),
]
store.index(doc)
index.index(doc)

query = {
'query': {
Expand All @@ -83,7 +83,7 @@ class MyDoc(BaseDoc):
},
}

docs, _ = store.execute_query(query)
docs, _ = index.execute_query(query)
assert [doc['id'] for doc in docs] == [doc[0].id, doc[1].id]


Expand All @@ -92,7 +92,7 @@ class MyDoc(BaseDoc):
expected_attendees: dict = Field(col_type='integer_range')
time_frame: dict = Field(col_type='date_range', format='yyyy-MM-dd')

store = ElasticV7DocIndex[MyDoc]()
index = ElasticV7DocIndex[MyDoc]()
doc = [
MyDoc(
expected_attendees={'gte': 10, 'lt': 20},
Expand All @@ -107,7 +107,7 @@ class MyDoc(BaseDoc):
time_frame={'gte': '2023-03-01', 'lt': '2023-04-01'},
),
]
store.index(doc)
index.index(doc)

query = {
'query': {
Expand All @@ -127,5 +127,5 @@ class MyDoc(BaseDoc):
}
},
}
docs, _ = store.execute_query(query)
docs, _ = index.execute_query(query)
assert [doc['id'] for doc in docs] == [doc[0].id, doc[1].id]
Loading

0 comments on commit 0c73ad8

Please sign in to comment.