Skip to content

Commit

Permalink
feat: create materialized view infers column name (#768)
Browse files Browse the repository at this point in the history
Per issue #739 
Create materialized view is able infer column names. Column name is
therefore optional on the user end.

Before:
CREATE MATERIALIZED VIEW dummy_view **(id, label)** AS SELECT id,
DummyObjectDetector(data).label FROM MyVideo;

After:
CREATE MATERIALIZED VIEW dummy_view AS SELECT id,
DummyObjectDetector(data).label FROM MyVideo;

Co-authored-by: Yulai Cui <alex@Yulais-MacBook-Pro.local>
  • Loading branch information
yulaicui and Yulai Cui committed May 25, 2023
1 parent bbd07e1 commit 579390c
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 10 deletions.
2 changes: 1 addition & 1 deletion eva/parser/eva.lark
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ rename_table: RENAME TABLE table_name TO table_name
create_udf: CREATE UDF if_not_exists? udf_name INPUT create_definitions OUTPUT create_definitions TYPE udf_type IMPL udf_impl udf_metadata* | CREATE UDF if_not_exists? udf_name IMPL udf_impl udf_metadata* | CREATE UDF if_not_exists? udf_name TYPE udf_type udf_metadata*

// Create Materialized View
create_materialized_view: CREATE MATERIALIZED VIEW if_not_exists? table_name ("(" uid_list ")") AS select_statement
create_materialized_view: CREATE MATERIALIZED VIEW if_not_exists? table_name ("(" uid_list ")") AS select_statement | CREATE MATERIALIZED VIEW if_not_exists? table_name AS select_statement

// Details
udf_name: uid
Expand Down
15 changes: 12 additions & 3 deletions eva/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def create_materialized_view(self, tree):
view_info = None
if_not_exists = False
query = None
uid_list = []

for child in tree.children:
if isinstance(child, Tree):
Expand All @@ -248,9 +249,17 @@ def create_materialized_view(self, tree):

# setting all other column definition attributes as None,
# need to figure from query
col_list = [
ColumnDefinition(uid.col_name, None, None, None) for uid in uid_list
]
if uid_list == []:
assert (query is not None)
for uid in query.target_list:
uid_list.append(uid)

col_list = []
for uid in uid_list:
if hasattr(uid, "col_name"):
col_list.append(ColumnDefinition(uid.col_name, None, None, None))
elif hasattr(uid, "output"):
col_list.append(ColumnDefinition(uid.output, None, None, None))
return CreateMaterializedViewStatement(
view_info, col_list, if_not_exists, query
)
Expand Down
54 changes: 48 additions & 6 deletions test/integration_tests/test_mat_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,36 @@ def tearDownClass(cls):
execute_query_fetch_all("DROP TABLE UATRAC;")

def test_should_mat_view_with_dummy(self):
materialized_query = """CREATE MATERIALIZED VIEW dummy_view (id, label)
materialized_query = """CREATE MATERIALIZED VIEW dummy_view0 (id, label)
AS SELECT id, DummyObjectDetector(data).label FROM MyVideo;
"""
execute_query_fetch_all(materialized_query)

select_query = "SELECT id, label FROM dummy_view;"
select_query = "SELECT id, label FROM dummy_view0;"
actual_batch = execute_query_fetch_all(select_query)
actual_batch.sort()

labels = DummyObjectDetector().labels
expected = [
{"dummy_view.id": i, "dummy_view.label": [labels[1 + i % 2]]}
{"dummy_view0.id": i, "dummy_view0.label": [labels[1 + i % 2]]}
for i in range(NUM_FRAMES)
]
expected_batch = Batch(frames=pd.DataFrame(expected))
self.assertEqual(actual_batch, expected_batch)

def test_should_infer_mat_view_column_names_with_dummy(self):
materialized_query = """CREATE MATERIALIZED VIEW dummy_view1
AS SELECT id, DummyObjectDetector(data).label FROM MyVideo;
"""
execute_query_fetch_all(materialized_query)

select_query = "SELECT id, label FROM dummy_view1;"
actual_batch = execute_query_fetch_all(select_query)
actual_batch.sort()

labels = DummyObjectDetector().labels
expected = [
{"dummy_view1.id": i, "dummy_view1.label": [labels[1 + i % 2]]}
for i in range(NUM_FRAMES)
]
expected_batch = Batch(frames=pd.DataFrame(expected))
Expand Down Expand Up @@ -131,18 +149,42 @@ def test_should_mat_view_with_fastrcnn_lateral_join(self):
)
query = (
"CREATE MATERIALIZED VIEW IF NOT EXISTS "
f"uadtrac_fastRCNN_new (id, label, bbox) AS {select_query};"
f"uadtrac_fastRCNN_new0 (id, label, bbox) AS {select_query};"
)
execute_query_fetch_all(query)

select_view_query = "SELECT id, label, bbox FROM uadtrac_fastRCNN_new0"
actual_batch = execute_query_fetch_all(select_view_query)
actual_batch.sort()

self.assertEqual(len(actual_batch), 5)
# non-trivial test case
res = actual_batch.frames
for idx in res.index:
self.assertTrue("car" in res["uadtrac_fastrcnn_new0.label"][idx])

execute_query_fetch_all("DROP TABLE IF EXISTS uadtrac_fastRCNN;")

@pytest.mark.torchtest
def test_should_infer_mat_view_column_names_with_fastrcnn_lateral_join(self):
select_query = (
"SELECT id, label, bbox FROM UATRAC JOIN LATERAL "
"Yolo(data) AS T(label, bbox, score) WHERE id < 5;"
)
query = (
"CREATE MATERIALIZED VIEW IF NOT EXISTS "
f"uadtrac_fastRCNN_new1 AS {select_query};"
)
execute_query_fetch_all(query)

select_view_query = "SELECT id, label, bbox FROM uadtrac_fastRCNN_new"
select_view_query = "SELECT id, label, bbox FROM uadtrac_fastRCNN_new1"
actual_batch = execute_query_fetch_all(select_view_query)
actual_batch.sort()

self.assertEqual(len(actual_batch), 5)
# non-trivial test case
res = actual_batch.frames
for idx in res.index:
self.assertTrue("car" in res["uadtrac_fastrcnn_new.label"][idx])
self.assertTrue("car" in res["uadtrac_fastrcnn_new1.label"][idx])

execute_query_fetch_all("DROP TABLE IF EXISTS uadtrac_fastRCNN;")

0 comments on commit 579390c

Please sign in to comment.