Enhancing INSERT Command to Support Inserting Multiple Values (#1421)

# Problem Statement INSERT command wasn't inserting multiple values into table. It was only inserting the first tuple and ignoring everything else. ![image](https://github.com/georgia-tech-db/evadb/assets/42912887/ed8104af-cedb-4453-88ec-1e4d7827fa02) # Solution Modified backend of INSERT command by finding out two things: 1. Place where other tuples are dropped except the first one. That happened inside `parser/lark_visitor/_insert_statements.py`. 2. Place where INSERT command is actually executed, and does it support inserting multiple tuples and it does, `executor/insert_executor.py`. After find those two things, I simply captured all the tuples coming from the tree created by Lark, and passed them to the executor through the planner. Tried to make sure that there are no issues inside planner because of that. I also modified hash functions of InsertTableStatement, InsertPlan and LogicalInsert classes because value_list member became 2-dimensional after my change, and couldn't be hashed. So I converted each element of value_list to tuple, and then hash functions were working. # Output after enhancing INSERT command ![image](https://github.com/georgia-tech-db/evadb/assets/42912887/c15cac9b-a06f-4835-9b34-b9720efaa432) --------- Co-authored-by: Anmol Agarwal <aagarwal622@gatech.edu> Co-authored-by: americast <sayan.sinha@cc.gatech.edu>
georgia-tech-db · Dec 1, 2023 · c2457b2 · c2457b2
1 parent bbfa483
commit c2457b2
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 12 deletions.
diff --git a/evadb/executor/insert_executor.py b/evadb/executor/insert_executor.py
@@ -42,12 +42,13 @@ def exec(self, *args, **kwargs):
             table_catalog_entry.table_type == TableType.STRUCTURED_DATA
         ), "INSERT only implemented for structured data"
 
-        values_to_insert = [val_node.value for val_node in self.node.value_list]
-        tuple_to_insert = tuple(values_to_insert)
+        tuples_to_insert = [
+            tuple(i.value for i in val_node) for val_node in self.node.value_list
+        ]
         columns_to_insert = [col_node.name for col_node in self.node.column_list]
 
         # Adding all values to Batch for insert
-        dataframe = pd.DataFrame([tuple_to_insert], columns=columns_to_insert)
+        dataframe = pd.DataFrame(tuples_to_insert, columns=columns_to_insert)
         batch = Batch(dataframe)
 
         storage_engine = StorageEngine.factory(self.db, table_catalog_entry)
@@ -75,5 +76,5 @@ def exec(self, *args, **kwargs):
                 execute_query_fetch_all(self.db, create_index_query)
 
         yield Batch(
-            pd.DataFrame([f"Number of rows loaded: {str(len(values_to_insert))}"])
+            pd.DataFrame([f"Number of rows loaded: {str(len(tuples_to_insert))}"])
         )
diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py
@@ -491,7 +491,7 @@ def __hash__(self) -> int:
             (
                 super().__hash__(),
                 self.table,
-                tuple(self.value_list),
+                tuple(tuple(i) for i in self.value_list),
                 tuple(self.column_list),
             )
         )

diff --git a/evadb/parser/insert_statement.py b/evadb/parser/insert_statement.py
@@ -90,6 +90,6 @@ def __hash__(self) -> int:
                 super().__hash__(),
                 self.table_ref,
                 tuple(self.column_list),
-                tuple(self.value_list),
+                tuple(tuple(val) for val in self.value_list),
             )
         )
diff --git a/evadb/parser/lark_visitor/_insert_statements.py b/evadb/parser/lark_visitor/_insert_statements.py
@@ -39,9 +39,7 @@ def insert_statement(self, tree):
                 elif child.data == "uid_list":
                     column_list = self.visit(child)
                 elif child.data == "insert_statement_value":
-                    insrt_value = self.visit(child)
-                    # Support only (value1, value2, .... value n)
-                    value_list = insrt_value[0]
+                    value_list = self.visit(child)
 
         insert_stmt = InsertTableStatement(table_ref, column_list, value_list)
         return insert_stmt

diff --git a/evadb/plan_nodes/insert_plan.py b/evadb/plan_nodes/insert_plan.py
@@ -55,6 +55,6 @@ def __hash__(self) -> int:
                 super().__hash__(),
                 self.table_ref,
                 tuple(self.column_list),
-                tuple(self.value_list),
+                tuple(tuple(val) for val in self.value_list),
             )
         )
diff --git a/test/integration_tests/short/test_insert_executor.py b/test/integration_tests/short/test_insert_executor.py
@@ -43,10 +43,21 @@ def setUp(self):
         """
         execute_query_fetch_all(self.evadb, query)
 
+        query = """CREATE TABLE IF NOT EXISTS books
+            (
+                name    TEXT(100),
+                author  TEXT(100),
+                year    INTEGER
+            );
+        """
+        execute_query_fetch_all(self.evadb, query)
+
     def tearDown(self):
         shutdown_ray()
         file_remove("dummy.avi")
 
+        execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS books;")
+
     # integration test
     @unittest.skip("Not supported in current version")
     def test_should_load_video_in_table(self):
@@ -111,3 +122,98 @@ def test_should_insert_tuples_in_table(self):
         query = """SELECT name FROM CSVTable WHERE name LIKE '.*(sad|happy)';"""
         batch = execute_query_fetch_all(self.evadb, query)
         self.assertEqual(len(batch._frames), 2)
+
+    def test_insert_one_tuple_in_table(self):
+        query = """
+            INSERT INTO books (name, author, year) VALUES (
+                'Harry Potter', 'JK Rowling', 1997
+            );
+        """
+        execute_query_fetch_all(self.evadb, query)
+        query = "SELECT * FROM books;"
+        batch = execute_query_fetch_all(self.evadb, query)
+        logger.info(batch)
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.name"].array,
+                np.array(
+                    [
+                        "Harry Potter",
+                    ]
+                ),
+            )
+        )
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.author"].array,
+                np.array(
+                    [
+                        "JK Rowling",
+                    ]
+                ),
+            )
+        )
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.year"].array,
+                np.array(
+                    [
+                        1997,
+                    ]
+                ),
+            )
+        )
+
+    def test_insert_multiple_tuples_in_table(self):
+        query = """
+            INSERT INTO books (name, author, year) VALUES
+            ('Fantastic Beasts Collection', 'JK Rowling', 2001),
+            ('Magic Tree House Collection', 'Mary Pope Osborne', 1992),
+            ('Sherlock Holmes', 'Arthur Conan Doyle', 1887);
+        """
+        execute_query_fetch_all(self.evadb, query)
+        query = "SELECT * FROM books;"
+        batch = execute_query_fetch_all(self.evadb, query)
+        logger.info(batch)
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.name"].array,
+                np.array(
+                    [
+                        "Fantastic Beasts Collection",
+                        "Magic Tree House Collection",
+                        "Sherlock Holmes",
+                    ]
+                ),
+            )
+        )
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.author"].array,
+                np.array(
+                    [
+                        "JK Rowling",
+                        "Mary Pope Osborne",
+                        "Arthur Conan Doyle",
+                    ]
+                ),
+            )
+        )
+
+        self.assertIsNone(
+            np.testing.assert_array_equal(
+                batch.frames["books.year"].array,
+                np.array(
+                    [
+                        2001,
+                        1992,
+                        1887,
+                    ]
+                ),
+            )
+        )
diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py
@@ -827,8 +827,10 @@ def test_insert_statement(self):
                 TupleValueExpression("Frame_Path"),
             ],
             [
-                ConstantValueExpression(1),
-                ConstantValueExpression("/mnt/frames/1.png", ColumnType.TEXT),
+                [
+                    ConstantValueExpression(1),
+                    ConstantValueExpression("/mnt/frames/1.png", ColumnType.TEXT),
+                ]
             ],
         )
         evadb_statement_list = parser.parse(insert_query)