Skip to content

Commit

Permalink
feat: create index if exists (#1082)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiashenC committed Sep 10, 2023
1 parent fe43a57 commit 5a3efce
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 12 deletions.
36 changes: 36 additions & 0 deletions docs/source/reference/evaql/create.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,42 @@ To create a table, specify the schema of the table.
object_id INTEGER
);
CREATE INDEX
------------

The CREATE INDEX statement allows us to construct an EvaDB based index to accelerate semantic based searching.
The index can be created on either a column of a table directly or outputs from a function running on a column of a table.

.. code:: sql
CREATE INDEX [index_name]
ON [table_name] ([column_name])
USING [index_method]
CREATE INDEX [index_name]
ON [table_name] ([function_name]([column_name]))
USING [index_method]
* [index_name] is the name the of constructed index.
* [table_name] is the name of the table, on which the index is created.
* [column_name] is the name of one of the column in the table. We currently only support creating index on single column of a table.
* [function_name] is an optional parameter that can be added if the index needs to be construsted on results of a funciton.

Examples
~~~~~~~~

.. code:: sql
CREATE INDEX reddit_index
ON reddit_dataset (data)
USING FAISS
CREATE INDEX func_reddit_index
ON reddit_dataset (SiftFeatureExtractor(data))
USING QDRANT
You can check out :ref:`similarity search use case<image-search>` about how to use index automatically.

CREATE FUNCTION
---------------

Expand Down
8 changes: 6 additions & 2 deletions evadb/executor/create_index_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ def __init__(self, db: EvaDBDatabase, node: CreateIndexPlan):
def exec(self, *args, **kwargs):
if self.catalog().get_index_catalog_entry_by_name(self.node.name):
msg = f"Index {self.node.name} already exists."
logger.error(msg)
raise ExecutorError(msg)
if self.node.if_not_exists:
logger.warn(msg)
return
else:
logger.error(msg)
raise ExecutorError(msg)

self.index_path = self._get_index_save_path()
self.index = None
Expand Down
8 changes: 8 additions & 0 deletions evadb/optimizer/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,7 @@ class LogicalCreateIndex(Operator):
def __init__(
self,
name: str,
if_not_exists: bool,
table_ref: TableRef,
col_list: List[ColumnDefinition],
vector_store_type: VectorStoreType,
Expand All @@ -1078,6 +1079,7 @@ def __init__(
):
super().__init__(OperatorType.LOGICALCREATEINDEX, children)
self._name = name
self._if_not_exists = if_not_exists
self._table_ref = table_ref
self._col_list = col_list
self._vector_store_type = vector_store_type
Expand All @@ -1087,6 +1089,10 @@ def __init__(
def name(self):
return self._name

@property
def if_not_exists(self):
return self._if_not_exists

@property
def table_ref(self):
return self._table_ref
Expand All @@ -1110,6 +1116,7 @@ def __eq__(self, other):
return (
is_subtree_equal
and self.name == other.name
and self.if_not_exists == other.if_not_exists
and self.table_ref == other.table_ref
and self.col_list == other.col_list
and self.vector_store_type == other.vector_store_type
Expand All @@ -1121,6 +1128,7 @@ def __hash__(self) -> int:
(
super().__hash__(),
self.name,
self.if_not_exists,
self.table_ref,
tuple(self.col_list),
self.vector_store_type,
Expand Down
1 change: 1 addition & 0 deletions evadb/optimizer/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ def check(self, before: Operator, context: OptimizerContext):
def apply(self, before: LogicalCreateIndex, context: OptimizerContext):
after = CreateIndexPlan(
before.name,
before.if_not_exists,
before.table_ref,
before.col_list,
before.vector_store_type,
Expand Down
1 change: 1 addition & 0 deletions evadb/optimizer/statement_to_opr_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ def visit_explain(self, statement: ExplainStatement):
def visit_create_index(self, statement: CreateIndexStatement):
create_index_opr = LogicalCreateIndex(
statement.name,
statement.if_not_exists,
statement.table_ref,
statement.col_list,
statement.vector_store_type,
Expand Down
11 changes: 10 additions & 1 deletion evadb/parser/create_index_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,24 @@ class CreateIndexStatement(AbstractStatement):
def __init__(
self,
name: str,
if_not_exists: bool,
table_ref: TableRef,
col_list: List[ColumnDefinition],
vector_store_type: VectorStoreType,
function: FunctionExpression = None,
):
super().__init__(StatementType.CREATE_INDEX)
self._name = name
self._if_not_exists = if_not_exists
self._table_ref = table_ref
self._col_list = col_list
self._vector_store_type = vector_store_type
self._function = function

def __str__(self) -> str:
print_str = "CREATE INDEX {} ON {} ({}{}) ".format(
print_str = "CREATE INDEX {} {} ON {} ({}{}) ".format(
self._name,
"IF NOT EXISTS" if self._if_not_exists else "",
self._table_ref,
"" if self._function else self._function,
tuple(self._col_list),
Expand All @@ -51,6 +54,10 @@ def __str__(self) -> str:
def name(self):
return self._name

@property
def if_not_exists(self):
return self._if_not_exists

@property
def table_ref(self):
return self._table_ref
Expand All @@ -72,6 +79,7 @@ def __eq__(self, other):
return False
return (
self._name == other.name
and self._if_not_exists == other.if_not_exists
and self._table_ref == other.table_ref
and self.col_list == other.col_list
and self._vector_store_type == other.vector_store_type
Expand All @@ -83,6 +91,7 @@ def __hash__(self) -> int:
(
super().__hash__(),
self._name,
self._if_not_exists,
self._table_ref,
tuple(self.col_list),
self._vector_store_type,
Expand Down
2 changes: 1 addition & 1 deletion evadb/parser/evadb.lark
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ colon_param_dict: LR_CURLY_BRACKET colon_param ("," colon_param)* RR_CURLY_BRACK

create_database_engine_clause: WITH ENGINE "=" string_literal "," PARAMETERS "=" colon_param_dict

create_index: CREATE INDEX uid ON table_name index_elem vector_store_type?
create_index: CREATE INDEX if_not_exists? uid ON table_name index_elem vector_store_type?

create_table: CREATE TABLE if_not_exists? table_name (create_definitions | (AS select_statement))

Expand Down
5 changes: 4 additions & 1 deletion evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def vector_store_type(self, tree):
# INDEX CREATION
def create_index(self, tree):
index_name = None
if_not_exists = False
table_name = None
vector_store_type = None
index_elem = None
Expand All @@ -253,6 +254,8 @@ def create_index(self, tree):
if isinstance(child, Tree):
if child.data == "uid":
index_name = self.visit(child)
if child.data == "if_not_exists":
if_not_exists = True
elif child.data == "table_name":
table_name = self.visit(child)
table_ref = TableRef(table_name)
Expand All @@ -276,7 +279,7 @@ def create_index(self, tree):
]

return CreateIndexStatement(
index_name, table_ref, col_list, vector_store_type, function
index_name, if_not_exists, table_ref, col_list, vector_store_type, function
)


Expand Down
7 changes: 7 additions & 0 deletions evadb/plan_nodes/create_index_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ class CreateIndexPlan(AbstractPlan):
def __init__(
self,
name: str,
if_not_exists: bool,
table_ref: TableRef,
col_list: List[ColumnDefinition],
vector_store_type: VectorStoreType,
function: FunctionExpression = None,
):
super().__init__(PlanOprType.CREATE_INDEX)
self._name = name
self._if_not_exists = if_not_exists
self._table_ref = table_ref
self._col_list = col_list
self._vector_store_type = vector_store_type
Expand All @@ -42,6 +44,10 @@ def __init__(
def name(self):
return self._name

@property
def if_not_exists(self):
return self._if_not_exists

@property
def table_ref(self):
return self._table_ref
Expand Down Expand Up @@ -76,6 +82,7 @@ def __hash__(self) -> int:
(
super().__hash__(),
self.name,
self.if_not_exists,
self.table_ref,
tuple(self.col_list),
self.vector_store_type,
Expand Down
40 changes: 34 additions & 6 deletions test/integration_tests/long/test_create_index_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pytest

from evadb.catalog.catalog_type import VectorStoreType
from evadb.executor.executor_utils import ExecutorError
from evadb.models.storage.batch import Batch
from evadb.server.command_handler import execute_query_fetch_all
from evadb.storage.storage_engine import StorageEngine
Expand Down Expand Up @@ -90,13 +91,46 @@ def setUpClass(cls):
)
storage_engine.write(input_tb_entry, input_batch_data)

@classmethod
def tearDown(cls):
query = "DROP INDEX testCreateIndexName;"
execute_query_fetch_all(cls.evadb, query)

@classmethod
def tearDownClass(cls):
query = "DROP TABLE testCreateIndexFeatTable;"
execute_query_fetch_all(cls.evadb, query)
query = "DROP TABLE testCreateIndexInputTable;"
execute_query_fetch_all(cls.evadb, query)

@macos_skip_marker
def test_index_already_exist(self):
query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;"
execute_query_fetch_all(self.evadb, query)

self.assertEqual(
self.evadb.catalog()
.get_index_catalog_entry_by_name("testCreateIndexName")
.type,
VectorStoreType.FAISS,
)

# Should throw error without if_not_exists.
query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;"
with self.assertRaises(ExecutorError):
execute_query_fetch_all(self.evadb, query)

# Should not create index but without throwing errors.
query = "CREATE INDEX IF NOT EXISTS testCreateIndexName ON testCreateIndexFeatTable (feat) USING QDRANT;"
execute_query_fetch_all(self.evadb, query)

self.assertEqual(
self.evadb.catalog()
.get_index_catalog_entry_by_name("testCreateIndexName")
.type,
VectorStoreType.FAISS,
)

@macos_skip_marker
def test_should_create_index_faiss(self):
query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;"
Expand Down Expand Up @@ -133,9 +167,6 @@ def test_should_create_index_faiss(self):
self.assertEqual(distance[0][0], 0)
self.assertEqual(row_id[0][0], 1)

# Cleanup.
self.evadb.catalog().drop_index_catalog_entry("testCreateIndexName")

@macos_skip_marker
def test_should_create_index_with_function(self):
query = "CREATE INDEX testCreateIndexName ON testCreateIndexInputTable (DummyFeatureExtractor(input)) USING FAISS;"
Expand Down Expand Up @@ -169,6 +200,3 @@ def test_should_create_index_with_function(self):
distance, row_id = index.search(np.array([[0, 0, 0]]).astype(np.float32), 1)
self.assertEqual(distance[0][0], 0)
self.assertEqual(row_id[0][0], 1)

# Cleanup.
self.evadb.catalog().drop_index_catalog_entry("testCreateIndexName")
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def test_check_plan_equality(self):
MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
)
create_index_plan = LogicalCreateIndex(
MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
)
delete_plan = LogicalDelete(MagicMock())
insert_plan = LogicalInsert(
Expand Down
11 changes: 11 additions & 0 deletions test/unit_tests/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def test_create_index_statement(self):

expected_stmt = CreateIndexStatement(
"testindex",
False,
TableRef(TableInfo("MyVideo")),
[
ColumnDefinition("featCol", None, None, None),
Expand All @@ -115,6 +116,15 @@ def test_create_index_statement(self):
actual_stmt = evadb_stmt_list[0]
self.assertEqual(actual_stmt, expected_stmt)

# create if_not_exists
create_index_query = (
"CREATE INDEX IF NOT EXISTS testindex ON MyVideo (featCol) USING FAISS;"
)
evadb_stmt_list = parser.parse(create_index_query)
actual_stmt = evadb_stmt_list[0]
expected_stmt._if_not_exists = True
self.assertEqual(actual_stmt, expected_stmt)

# create index on Function expression
create_index_query = (
"CREATE INDEX testindex ON MyVideo (FeatureExtractor(featCol)) USING FAISS;"
Expand All @@ -130,6 +140,7 @@ def test_create_index_statement(self):
func_expr.append_child(TupleValueExpression("featCol"))
expected_stmt = CreateIndexStatement(
"testindex",
False,
TableRef(TableInfo("MyVideo")),
[
ColumnDefinition("featCol", None, None, None),
Expand Down

0 comments on commit 5a3efce

Please sign in to comment.