fix: third-party test (#1124)

1. Added sqlite testcases to PR testing. 2. Fixed `mysql` case sensitivity issue. 3. Fixed the `sqlalchemy` uri for each database. --------- Co-authored-by: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
georgia-tech-db · Sep 15, 2023 · 29efc16 · 29efc16
1 parent 5858f8d
commit 29efc16
Show file tree

Hide file tree

Showing 7 changed files with 66 additions and 13 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -56,6 +56,20 @@ workflows:
                 ignore: 
                   - master
                   - staging
+
+        ################################
+        #### SHORT THIRDPARTY TESTS: PR
+        ################################
+        ################################
+        - Linux:
+            name: Short Third Party Test | v3.10 | Linux
+            mode: SHORT THIRDPARTY TEST
+            filters:
+              branches:
+                ignore: 
+                  - master
+                  - staging
+
         ################################
         #### LONG INTEGRATION TESTS:
         #### Staging

diff --git a/evadb/storage/native_storage_engine.py b/evadb/storage/native_storage_engine.py
@@ -88,6 +88,7 @@ def _dict_to_sql_row(dict_row: dict, columns: List[ColumnCatalogEntry]):
 
 def _deserialize_sql_row(sql_row: tuple, columns: List[ColumnCatalogEntry]):
     # Deserialize numpy data
+
     dict_row = {}
     for idx, col in enumerate(columns):
         # hack, we skip deserializing if sql_row[col.name] is not of type bytes
@@ -173,9 +174,23 @@ def read(self, table: TableCatalogEntry) -> Iterator[Batch]:
             table_to_read = Table(table.name, metadata, autoload_with=engine)
             result = session.execute(table_to_read.select()).fetchall()
             data_batch = []
-            # todo check if the column order is consistent
+
+            # Ensure that the order of columns in the select is same as in table.columns
+            # Also verify if the column names are consistent
+            if result:
+                cols = result[0]._fields
+                index_dict = {
+                    element.lower(): index for index, element in enumerate(cols)
+                }
+                try:
+                    ordered_columns = sorted(
+                        table.columns, key=lambda x: index_dict[x.name.lower()]
+                    )
+                except KeyError as e:
+                    raise Exception(f"Column mismatch with error {e}")
+
             for row in result:
-                data_batch.append(_deserialize_sql_row(row, table.columns))
+                data_batch.append(_deserialize_sql_row(row, ordered_columns))
 
             if data_batch:
                 yield Batch(pd.DataFrame(data_batch))

diff --git a/evadb/third_party/databases/interface.py b/evadb/third_party/databases/interface.py
@@ -16,6 +16,8 @@
 import os
 from contextlib import contextmanager
 
+from evadb.executor.executor_utils import ExecutorError
+
 
 def _get_database_handler(engine: str, **kwargs):
     """
@@ -48,10 +50,10 @@ def _get_database_handler(engine: str, **kwargs):
 def get_database_handler(engine: str, **kwargs):
     handler = _get_database_handler(engine, **kwargs)
     try:
-        handler.connect()
+        resp = handler.connect()
+        if not resp.status:
+            raise ExecutorError(f"Cannot establish connection due to {resp.error}")
         yield handler
-    except Exception as e:
-        raise Exception(f"Error connecting to the database: {str(e)}")
     finally:
         handler.disconnect()
 

diff --git a/evadb/third_party/databases/mariadb/mariadb_handler.py b/evadb/third_party/databases/mariadb/mariadb_handler.py
@@ -71,7 +71,7 @@ def disconnect(self):
             self.connection.close()
 
     def get_sqlalchmey_uri(self) -> str:
-        return f"mysql+pymysql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
+        return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
 
     def check_connection(self) -> DBHandlerStatus:
         """
@@ -128,7 +128,9 @@ def _fetch_results_as_df(self, cursor):
         """
         try:
             res = cursor.fetchall()
-            res_df = pd.DataFrame(res, columns=[desc[0] for desc in cursor.description])
+            res_df = pd.DataFrame(
+                res, columns=[desc[0].lower() for desc in cursor.description]
+            )
             return res_df
         except mariadb.ProgrammingError as e:
             if str(e) == "no results to fetch":

diff --git a/evadb/third_party/databases/mysql/mysql_handler.py b/evadb/third_party/databases/mysql/mysql_handler.py
@@ -50,7 +50,7 @@ def disconnect(self):
             self.connection.close()
 
     def get_sqlalchmey_uri(self) -> str:
-        return f"mysql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
+        return f"mysql+mysqlconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
 
     def check_connection(self) -> DBHandlerStatus:
         if self.connection:
@@ -96,7 +96,9 @@ def _fetch_results_as_df(self, cursor):
             res = cursor.fetchall()
             if not res:
                 return pd.DataFrame({"status": ["success"]})
-            res_df = pd.DataFrame(res, columns=[desc[0] for desc in cursor.description])
+            res_df = pd.DataFrame(
+                res, columns=[desc[0].lower() for desc in cursor.description]
+            )
             return res_df
         except mysql.connector.ProgrammingError as e:
             if str(e) == "no results to fetch":

diff --git a/evadb/third_party/databases/postgres/postgres_handler.py b/evadb/third_party/databases/postgres/postgres_handler.py
@@ -65,7 +65,7 @@ def disconnect(self):
             self.connection.close()
 
     def get_sqlalchmey_uri(self) -> str:
-        return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
+        return f"postgresql+psycopg2://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
 
     def check_connection(self) -> DBHandlerStatus:
         """
@@ -127,7 +127,9 @@ def _fetch_results_as_df(self, cursor):
         """
         try:
             res = cursor.fetchall()
-            res_df = pd.DataFrame(res, columns=[desc[0] for desc in cursor.description])
+            res_df = pd.DataFrame(
+                res, columns=[desc[0].lower() for desc in cursor.description]
+            )
             return res_df
         except psycopg2.ProgrammingError as e:
             if str(e) == "no results to fetch":

diff --git a/script/test/test.sh b/script/test/test.sh
@@ -75,6 +75,12 @@ short_integration_test() {
   print_error_code $code "SHORT INTEGRATION TEST"
 }
 
+short_third_party_test(){
+  PYTHONPATH=./ python -m pytest -p no:cov test/third_party_tests/test_native_executor.py::NativeExecutorTest::test_should_run_query_in_sqlite -m "not benchmark" 
+  code=$?
+  print_error_code $code "SHORT THIRDPARTY TEST"
+}
+
 long_integration_test() {
   PYTHONPATH=./ python -m pytest test/integration_tests/long/ -p no:cov -m "not benchmark"
   code=$?
@@ -88,13 +94,14 @@ notebook_test() {
 }
 
 full_test() {
-  PYTHONPATH=./ pytest test/ --durations=20 --cov-report term-missing:skip-covered  --cov-config=.coveragerc --cov-context=test --cov=evadb/ --capture=sys --tb=short -v -rsf --log-level=WARNING -m "not benchmark" --ignore=test/third_party_tests/ --ignore=test/app_tests/
+  PYTHONPATH=./ pytest test/ test/third_party_tests/test_native_executor.py::NativeExecutorTest::test_should_run_query_in_sqlite --durations=20 --cov-report term-missing:skip-covered  --cov-config=.coveragerc --cov-context=test --cov=evadb/ --capture=sys --tb=short -v -rsf --log-level=WARNING -m "not benchmark" --ignore=test/third_party_tests/ --ignore=test/app_tests/
   code=$?
+
   print_error_code $code "FULL TEST"
 }
 
 no_coverage_full_test() {
-  PYTHONPATH=./ python -m pytest -p no:cov test/ -m "not benchmark" --ignore=test/third_party_tests/ --ignore=test/app_tests/
+  PYTHONPATH=./ python -m pytest -p no:cov test/ test/third_party_tests/test_native_executor.py::NativeExecutorTest::test_should_run_query_in_sqlite -m "not benchmark" --ignore=test/third_party_tests/ --ignore=test/app_tests/
   code=$?
   print_error_code $code "FULL TEST"
 }
@@ -173,6 +180,15 @@ then
   short_integration_test
 fi
 
+##################################################
+## SHORT THIRDPARTY TESTS
+##################################################
+
+if [[ "$MODE" = "SHORT THIRDPARTY TEST" ]];
+then 
+  short_third_party_test
+fi
+
 ##################################################
 ## LONG INTEGRATION TESTS
 ##################################################