Skip to content

Commit

Permalink
Enhancing INSERT Command to Support Inserting Multiple Values (#1421)
Browse files Browse the repository at this point in the history
# Problem Statement
INSERT command wasn't inserting multiple values into table. It was only
inserting the first tuple and ignoring everything else.

![image](https://github.com/georgia-tech-db/evadb/assets/42912887/ed8104af-cedb-4453-88ec-1e4d7827fa02)

# Solution
Modified backend of INSERT command by finding out two things:
1. Place where other tuples are dropped except the first one. That
happened inside `parser/lark_visitor/_insert_statements.py`.
2. Place where INSERT command is actually executed, and does it support
inserting multiple tuples and it does, `executor/insert_executor.py`.

After find those two things, I simply captured all the tuples coming
from the tree created by Lark, and passed them to the executor through
the planner. Tried to make sure that there are no issues inside planner
because of that.

I also modified hash functions of InsertTableStatement, InsertPlan and
LogicalInsert classes because value_list member became 2-dimensional
after my change, and couldn't be hashed. So I converted each element of
value_list to tuple, and then hash functions were working.

# Output after enhancing INSERT command

![image](https://github.com/georgia-tech-db/evadb/assets/42912887/c15cac9b-a06f-4835-9b34-b9720efaa432)

---------

Co-authored-by: Anmol Agarwal <aagarwal622@gatech.edu>
Co-authored-by: americast <sayan.sinha@cc.gatech.edu>
  • Loading branch information
3 people committed Dec 1, 2023
1 parent bbfa483 commit c2457b2
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 12 deletions.
9 changes: 5 additions & 4 deletions evadb/executor/insert_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ def exec(self, *args, **kwargs):
table_catalog_entry.table_type == TableType.STRUCTURED_DATA
), "INSERT only implemented for structured data"

values_to_insert = [val_node.value for val_node in self.node.value_list]
tuple_to_insert = tuple(values_to_insert)
tuples_to_insert = [
tuple(i.value for i in val_node) for val_node in self.node.value_list
]
columns_to_insert = [col_node.name for col_node in self.node.column_list]

# Adding all values to Batch for insert
dataframe = pd.DataFrame([tuple_to_insert], columns=columns_to_insert)
dataframe = pd.DataFrame(tuples_to_insert, columns=columns_to_insert)
batch = Batch(dataframe)

storage_engine = StorageEngine.factory(self.db, table_catalog_entry)
Expand Down Expand Up @@ -75,5 +76,5 @@ def exec(self, *args, **kwargs):
execute_query_fetch_all(self.db, create_index_query)

yield Batch(
pd.DataFrame([f"Number of rows loaded: {str(len(values_to_insert))}"])
pd.DataFrame([f"Number of rows loaded: {str(len(tuples_to_insert))}"])
)
2 changes: 1 addition & 1 deletion evadb/optimizer/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def __hash__(self) -> int:
(
super().__hash__(),
self.table,
tuple(self.value_list),
tuple(tuple(i) for i in self.value_list),
tuple(self.column_list),
)
)
Expand Down
2 changes: 1 addition & 1 deletion evadb/parser/insert_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,6 @@ def __hash__(self) -> int:
super().__hash__(),
self.table_ref,
tuple(self.column_list),
tuple(self.value_list),
tuple(tuple(val) for val in self.value_list),
)
)
4 changes: 1 addition & 3 deletions evadb/parser/lark_visitor/_insert_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def insert_statement(self, tree):
elif child.data == "uid_list":
column_list = self.visit(child)
elif child.data == "insert_statement_value":
insrt_value = self.visit(child)
# Support only (value1, value2, .... value n)
value_list = insrt_value[0]
value_list = self.visit(child)

insert_stmt = InsertTableStatement(table_ref, column_list, value_list)
return insert_stmt
Expand Down
2 changes: 1 addition & 1 deletion evadb/plan_nodes/insert_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ def __hash__(self) -> int:
super().__hash__(),
self.table_ref,
tuple(self.column_list),
tuple(self.value_list),
tuple(tuple(val) for val in self.value_list),
)
)
106 changes: 106 additions & 0 deletions test/integration_tests/short/test_insert_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,21 @@ def setUp(self):
"""
execute_query_fetch_all(self.evadb, query)

query = """CREATE TABLE IF NOT EXISTS books
(
name TEXT(100),
author TEXT(100),
year INTEGER
);
"""
execute_query_fetch_all(self.evadb, query)

def tearDown(self):
shutdown_ray()
file_remove("dummy.avi")

execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS books;")

# integration test
@unittest.skip("Not supported in current version")
def test_should_load_video_in_table(self):
Expand Down Expand Up @@ -111,3 +122,98 @@ def test_should_insert_tuples_in_table(self):
query = """SELECT name FROM CSVTable WHERE name LIKE '.*(sad|happy)';"""
batch = execute_query_fetch_all(self.evadb, query)
self.assertEqual(len(batch._frames), 2)

def test_insert_one_tuple_in_table(self):
query = """
INSERT INTO books (name, author, year) VALUES (
'Harry Potter', 'JK Rowling', 1997
);
"""
execute_query_fetch_all(self.evadb, query)
query = "SELECT * FROM books;"
batch = execute_query_fetch_all(self.evadb, query)
logger.info(batch)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.name"].array,
np.array(
[
"Harry Potter",
]
),
)
)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.author"].array,
np.array(
[
"JK Rowling",
]
),
)
)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.year"].array,
np.array(
[
1997,
]
),
)
)

def test_insert_multiple_tuples_in_table(self):
query = """
INSERT INTO books (name, author, year) VALUES
('Fantastic Beasts Collection', 'JK Rowling', 2001),
('Magic Tree House Collection', 'Mary Pope Osborne', 1992),
('Sherlock Holmes', 'Arthur Conan Doyle', 1887);
"""
execute_query_fetch_all(self.evadb, query)
query = "SELECT * FROM books;"
batch = execute_query_fetch_all(self.evadb, query)
logger.info(batch)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.name"].array,
np.array(
[
"Fantastic Beasts Collection",
"Magic Tree House Collection",
"Sherlock Holmes",
]
),
)
)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.author"].array,
np.array(
[
"JK Rowling",
"Mary Pope Osborne",
"Arthur Conan Doyle",
]
),
)
)

self.assertIsNone(
np.testing.assert_array_equal(
batch.frames["books.year"].array,
np.array(
[
2001,
1992,
1887,
]
),
)
)
6 changes: 4 additions & 2 deletions test/unit_tests/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,10 @@ def test_insert_statement(self):
TupleValueExpression("Frame_Path"),
],
[
ConstantValueExpression(1),
ConstantValueExpression("/mnt/frames/1.png", ColumnType.TEXT),
[
ConstantValueExpression(1),
ConstantValueExpression("/mnt/frames/1.png", ColumnType.TEXT),
]
],
)
evadb_statement_list = parser.parse(insert_query)
Expand Down

0 comments on commit c2457b2

Please sign in to comment.