From c37559cda00cea2e0178c9cd443abe2c7f90ae89 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 22 Feb 2024 21:06:20 -0800 Subject: [PATCH 1/3] fix(vectorstore): Correct HNSW parameter inclusion and test assertions - Fixed inclusion of HNSW parameters (M, EF_CONSTRUCTION, EF_RUNTIME) in Redis index creation. - Corrected test assertions to accurately reflect expected key count in Redis. - Updated check_index_exists to properly validate HNSW index configurations. --- .../vectorstore.py | 5 ++++- tests/test_memorystore_redis_vectorstore.py | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/langchain_google_memorystore_redis/vectorstore.py b/src/langchain_google_memorystore_redis/vectorstore.py index e40cac0..7419381 100644 --- a/src/langchain_google_memorystore_redis/vectorstore.py +++ b/src/langchain_google_memorystore_redis/vectorstore.py @@ -322,7 +322,10 @@ def init_index(client: redis.Redis, index_config: IndexConfig): f"FT.CREATE {index_config.name} ON HASH PREFIX 1 {RedisVectorStore.get_key_prefix(index_config.name)} " f"SCHEMA {index_config.field_name} VECTOR {index_config.type} " f"6 TYPE {index_config.data_type} DIM {index_config.vector_size} " - f"DISTANCE_METRIC {index_config.distance_metric}" + f"DISTANCE_METRIC {index_config.distance_metric} " + f"M {index_config.m} " + f"EF_CONSTRUCTION {index_config.ef_construction} " + f"EF_RUNTIME {index_config.ef_construction}" ) try: diff --git a/tests/test_memorystore_redis_vectorstore.py b/tests/test_memorystore_redis_vectorstore.py index 3a84272..3314575 100644 --- a/tests/test_memorystore_redis_vectorstore.py +++ b/tests/test_memorystore_redis_vectorstore.py @@ -134,8 +134,7 @@ def test_vector_store_add_texts(texts, metadatas, ids): # Verify no extra keys are present all_keys = [key.decode("utf-8") for key in client.keys(f"{index_name}*")] - # Currently RedisQuery stores the index schema as a key using the index_name - assert len(all_keys) == len(returned_ids) + 1, "Found unexpected keys in Redis" + assert len(all_keys) == len(returned_ids), "Found unexpected keys in Redis" # Clena up RedisVectorStore.drop_index(client=client, index_name=index_name) @@ -233,7 +232,7 @@ def test_vector_store_range_query(distance_strategy, distance_threshold): def check_index_exists( - client: redis.Redis, index_name: str, index_config: VectorIndexConfig + client: redis.Redis, index_name: str, index_config: HNSWConfig ) -> bool: try: index_info = client.ft(index_name).info() @@ -243,6 +242,17 @@ def check_index_exists( return ( index_info["index_name"] == index_name and index_info["index_definition"][1] == b"HASH" + and index_info["index_definition"][3][0].decode("utf-8") == index_config.name + and index_info["attributes"][0][1].decode("utf-8") == index_config.field_name + and index_info["attributes"][0][3].decode("utf-8") == index_config.field_name + and index_info["attributes"][0][5] == b'VECTOR' + and index_info["attributes"][0][7][3] == index_config.vector_size + and index_info["attributes"][0][7][5].decode("utf-8") == index_config.distance_metric + and index_info["attributes"][0][7][7].decode("utf-8") == index_config.data_type + and index_info["attributes"][0][7][9][1] == b'HNSW' + and index_info["attributes"][0][7][9][3] == index_config.m + and index_info["attributes"][0][7][9][5] == index_config.ef_construction + and index_info["attributes"][0][7][9][7] == index_config.ef_runtime ) From 12fa8bf19807869fe677893342be778198c5d343 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 22 Feb 2024 21:08:56 -0800 Subject: [PATCH 2/3] fixed formatting --- tests/test_memorystore_redis_vectorstore.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_memorystore_redis_vectorstore.py b/tests/test_memorystore_redis_vectorstore.py index 3314575..45f6f64 100644 --- a/tests/test_memorystore_redis_vectorstore.py +++ b/tests/test_memorystore_redis_vectorstore.py @@ -245,11 +245,12 @@ def check_index_exists( and index_info["index_definition"][3][0].decode("utf-8") == index_config.name and index_info["attributes"][0][1].decode("utf-8") == index_config.field_name and index_info["attributes"][0][3].decode("utf-8") == index_config.field_name - and index_info["attributes"][0][5] == b'VECTOR' + and index_info["attributes"][0][5] == b"VECTOR" and index_info["attributes"][0][7][3] == index_config.vector_size - and index_info["attributes"][0][7][5].decode("utf-8") == index_config.distance_metric + and index_info["attributes"][0][7][5].decode("utf-8") + == index_config.distance_metric and index_info["attributes"][0][7][7].decode("utf-8") == index_config.data_type - and index_info["attributes"][0][7][9][1] == b'HNSW' + and index_info["attributes"][0][7][9][1] == b"HNSW" and index_info["attributes"][0][7][9][3] == index_config.m and index_info["attributes"][0][7][9][5] == index_config.ef_construction and index_info["attributes"][0][7][9][7] == index_config.ef_runtime From fa7358535dd8d77e251bfb2a18ce85c782f62780 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 22 Feb 2024 22:52:00 -0800 Subject: [PATCH 3/3] fixed ill-formed ft.create command --- src/langchain_google_memorystore_redis/vectorstore.py | 5 +++-- tests/test_memorystore_redis_vectorstore.py | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/langchain_google_memorystore_redis/vectorstore.py b/src/langchain_google_memorystore_redis/vectorstore.py index 7419381..3103995 100644 --- a/src/langchain_google_memorystore_redis/vectorstore.py +++ b/src/langchain_google_memorystore_redis/vectorstore.py @@ -321,11 +321,12 @@ def init_index(client: redis.Redis, index_config: IndexConfig): command = ( f"FT.CREATE {index_config.name} ON HASH PREFIX 1 {RedisVectorStore.get_key_prefix(index_config.name)} " f"SCHEMA {index_config.field_name} VECTOR {index_config.type} " - f"6 TYPE {index_config.data_type} DIM {index_config.vector_size} " + f"14 TYPE {index_config.data_type} DIM {index_config.vector_size} " f"DISTANCE_METRIC {index_config.distance_metric} " + f"TYPE {index_config.data_type} " f"M {index_config.m} " f"EF_CONSTRUCTION {index_config.ef_construction} " - f"EF_RUNTIME {index_config.ef_construction}" + f"EF_RUNTIME {index_config.ef_runtime}" ) try: diff --git a/tests/test_memorystore_redis_vectorstore.py b/tests/test_memorystore_redis_vectorstore.py index 45f6f64..d86f691 100644 --- a/tests/test_memorystore_redis_vectorstore.py +++ b/tests/test_memorystore_redis_vectorstore.py @@ -36,7 +36,12 @@ def test_vector_store_init_index(): index_name = str(uuid.uuid4()) index_config = HNSWConfig( - name=index_name, distance_strategy=DistanceStrategy.COSINE, vector_size=128 + name=index_name, + distance_strategy=DistanceStrategy.COSINE, + vector_size=128, + m=1, + ef_construction=2, + ef_runtime=3, ) assert not check_index_exists(client, index_name, index_config)