test: adding support for pytest-xdist (#628)

* support for xdist * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * skip reuse server test in Windows * checkpoint * install decord from fork * fix * minor fix * checkpoint * ci: add support for macos * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * test eva decord * adds eva-decord * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint --------- Co-authored-by: Suryatej Reddy Vyalla <suryatej16102@iiitd.ac.in> Co-authored-by: Gaurav <gaurav21776@gmail.com>
georgia-tech-db · Apr 10, 2023 · 612b590 · 612b590
1 parent a4f3968
commit 612b590
Show file tree

Hide file tree

Showing 9 changed files with 175 additions and 54 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -2,61 +2,68 @@ version: 2.1
 
 orbs:
   win: circleci/windows@2.2.0
+  macos: circleci/macos@2.3.4
 
 workflows:
   main:
     jobs:
-        - test:
+        #### UNIT TESTS
+        - Linux:
             name: "Test | v3.7 | Linux"
             v: "3.7"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.8 | Linux"
             v: "3.8"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.9 | Linux"
             v: "3.9"
             mode: "TEST"
-        - test:
+        - Linux:
             name: "Test | v3.10 | Linux"
             v: "3.10"
             mode: "TEST"
         ### NOTEBOOKS
-        - test:
+        - Linux:
             name: "Notebook | v3.7 | Linux"
             v: "3.7"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.8 | Linux"
             v: "3.8"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.9 | Linux"
             v: "3.9"
             mode: "NOTEBOOK"
-        - test:
+        - Linux:
             name: "Notebook | v3.10 | Linux"
             v: "3.10"
             mode: "NOTEBOOK"
         ### LINTER
-        - test:
+        - Linux:
             name: "Linter | Linux"
             v: "3.10"
             mode: "LINTER"
         ### RAY
-        - test:
+        - Linux:
             name: "Test | Ray | v3.10 | Linux"
             v: "3.10"
             mode: "RAY"
         - Pip
-        - Windows
-        #- test:
-        #    name: "Linux -  v3.11"  # missing Torchvision
+        - Windows:
+            name: "Windows | v3.10"
+        # eva-decord does not work on Intel Macs
+        #- MacOS:
+        #    name: "MacOS | v3.10"
+        # missing Torchvision
+        #- Linux:
+        #    name: "Linux -  v3.11"  
         #    v: "3.11"
 
 jobs:
-  test:
+  Linux:
     parameters:
       v:
         type: string
@@ -76,6 +83,7 @@ jobs:
           name: Install EVA package from GitHub repo with all dependencies
           command: |
             "python<< parameters.v >>" -m venv test_evadb
+            pip install --upgrade pip
             source test_evadb/bin/activate
             pip install ".[dev]"
 
@@ -97,35 +105,56 @@ jobs:
             bash script/test/test.sh -m "<< parameters.mode >>"
 
   Windows:
-    executor: win/default
-    parameters:
-      v:
-        type: string
-        default: "3.10"
-    steps:
-      - checkout
+      executor: win/default
+      steps:
+        - checkout
+        - run: 
+            name: Install EVA package from GitHub repo and run tests
+            command: |
+              choco install python --version=3.10.8 -y
+              python --version
+              pip --version
+              pip install virtualenv
+              virtualenv test_evadb
+              test_evadb\Scripts\activate
+              pip install ".[dev]"
+              bash script\test\test.sh
 
-      - run:
-          name: Test windows
-          command: |
-            Set-StrictMode -Version Latest
-            $ErrorActionPreference = 'Continue'
-            pip install virtualenv
-            virtualenv test_evadb
-            test_evadb\Scripts\activate
-            pip install ".[dev]"
-            bash script\test\test.sh
+  MacOS:
+      macos:
+        xcode: "14.2.0"
+      steps:
+        - run:           
+            name: Setup Python
+            command: |
+              brew update
+              brew install pyenv git
+              pyenv install 3.10.8
+              pyenv global 3.10.8
+              eval "$(pyenv init -)"
+              python --version
+              pip --version
+        - checkout
+        - run:           
+            name: Install EVA package from GitHub repo and run tests
+            command: |
+              python -m venv test_evadb
+              source test_evadb/bin/activate
+              pip install --upgrade pip
+              pip debug --verbose
+              pip install eva-decord -vvv
+              pip install ".[dev]"
+              source test_evadb/bin/activate
+              bash script/test/test.sh 
 
   Pip:
     resource_class: large
     docker:
       - image: "cimg/python:3.10"
     steps:
-
       - checkout
-
       - run:
-          name: Install EVA package from pip and start server
+          name: Install EVA package from PIP and start server
           command: |
             pip install --upgrade pip
             pip install evadb

diff --git a/eva/catalog/sql_config.py b/eva/catalog/sql_config.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 
 from sqlalchemy import create_engine, event
 from sqlalchemy.orm import scoped_session, sessionmaker
@@ -20,17 +21,6 @@
 
 IDENTIFIER_COLUMN = "_row_id"
 
-# import os
-# def prefix_worker_id(uri: str):
-#    try:
-#        worker_id = os.environ["PYTEST_XDIST_WORKER"]
-#        base = "eva_catalog.db"
-#        uri = uri.replace(base, str(worker_id) + "_" + base)
-#    except KeyError:
-#        # Single threaded mode
-#        pass
-#    return uri
-
 
 class SQLConfig:
     """Singleton class for configuring connection to the database.
@@ -58,10 +48,21 @@ def __init__(self):
         Retrieves the database uri for connection from ConfigurationManager.
         """
         uri = ConfigurationManager().get_value("core", "catalog_database_uri")
-        # parallelize using xdist
-        # worker_uri = prefix_worker_id(str(uri))
+
+        # to parallelize tests using pytest-xdist
+        def prefix_worker_id_to_uri(uri: str):
+            try:
+                worker_id = os.environ["PYTEST_XDIST_WORKER"]
+                base = "eva_catalog.db"
+                # eva_catalog.db -> test_gw1_eva_catalog.db
+                uri = uri.replace(base, "test_" + str(worker_id) + "_" + base)
+            except KeyError:
+                pass
+            return uri
+
+        self.worker_uri = prefix_worker_id_to_uri(str(uri))
         # set echo=True to log SQL
-        self.engine = create_engine(uri)
+        self.engine = create_engine(self.worker_uri, isolation_level="SERIALIZABLE")
 
         if self.engine.url.get_backend_name() == "sqlite":
             # enforce foreign key constraint and wal logging for sqlite

diff --git a/eva/configuration/configuration_manager.py b/eva/configuration/configuration_manager.py
@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from pathlib import Path
 from typing import Any
 
 import yaml
@@ -34,11 +36,30 @@ def __new__(cls):
 
         return cls._instance
 
+    @classmethod
+    def suffix_pytest_xdist_worker_id_to_dir(cls, path: Path):
+        try:
+            worker_id = os.environ["PYTEST_XDIST_WORKER"]
+            path = path / str(worker_id)
+        except KeyError:
+            pass
+        return path
+
     @classmethod
     def _create_if_not_exists(cls):
         if not cls._yml_path.exists():
+            initial_eva_config_dir = Path(EVA_DEFAULT_DIR)
+
+            # parallelize tests using pytest-xdist
+            # activated only under pytest-xdist
+            # Changes config dir From EVA_DEFAULT_DIR To EVA_DEFAULT_DIR / gw1
+            # (where gw1 is worker id)
+            updated_eva_config_dir = cls.suffix_pytest_xdist_worker_id_to_dir(
+                initial_eva_config_dir
+            )
+            cls._yml_path = updated_eva_config_dir / EVA_CONFIG_FILE
             bootstrap_environment(
-                eva_config_dir=EVA_DEFAULT_DIR,
+                eva_config_dir=updated_eva_config_dir,
                 eva_installation_dir=EVA_INSTALLATION_DIR,
             )
 

diff --git a/eva/utils/generic_utils.py b/eva/utils/generic_utils.py
@@ -106,6 +106,17 @@ def is_gpu_available() -> bool:
         return False
 
 
+def prefix_worker_id_to_path(path: str):
+    try:
+        worker_id = os.environ["PYTEST_XDIST_WORKER"]
+        base = "eva_datasets"
+        path = "build/" + str(worker_id) + "_" + base
+    except KeyError:
+        # Single threaded mode
+        pass
+    return path
+
+
 def get_gpu_count() -> int:
     """
     Check number of GPUs through Torch.
@@ -134,6 +145,7 @@ def generate_file_path(name: str = "") -> Path:
         logger.error("Missing dataset location key in eva.yml")
         raise KeyError("Missing datasets_dir key in eva.yml")
 
+    dataset_location = prefix_worker_id_to_path(dataset_location)
     dataset_location = Path(dataset_location)
     dataset_location.mkdir(parents=True, exist_ok=True)
 

diff --git a/script/test/test.sh b/script/test/test.sh
@@ -92,7 +92,7 @@ then
     fi
 # Windows -- no need for coverage report
 else
-    PYTHONPATH=./ pytest -p no:cov test/ -m "not benchmark"
+    PYTHONPATH=./ python -m pytest -p no:cov test/ -m "not benchmark"
     test_code=$?
     if [ "$test_code" != "0" ];
     then

diff --git a/test/catalog/services/test_index_catalog_service.py b/test/catalog/services/test_index_catalog_service.py
@@ -18,6 +18,7 @@
 from mock import MagicMock, patch
 from sqlalchemy.orm.exc import NoResultFound
 
+from eva.catalog.catalog_manager import CatalogManager
 from eva.catalog.catalog_type import ColumnType, IndexType
 from eva.catalog.models.column_catalog import ColumnCatalogEntry
 from eva.catalog.services.index_catalog_service import IndexCatalogService
@@ -98,6 +99,7 @@ def test_index_catalog_exception(self, mock_index_catalog):
         self.assertEqual(service.get_all_entries(), [])
 
     def test_index_get_all_entries(self):
+        CatalogManager().reset()
         INDEX_NAME = "name"
         INDEX_URL = "file1"
         INDEX_TYPE = IndexType.HNSW

diff --git a/test/udfs/ndarray/test_open.py b/test/udfs/ndarray/test_open.py
@@ -59,5 +59,5 @@ def test_open_same_path_should_use_cache(self):
             mock_cv2.imread.assert_not_called()
 
     def test_open_path_should_raise_error(self):
-        with self.assertRaises(AssertionError):
+        with self.assertRaises((AssertionError, FileNotFoundError)):
             self.open_instance(pd.DataFrame(["incorrect_path"]))
diff --git a/test/utils/test_generic_utils.py b/test/utils/test_generic_utils.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import unittest
-from pathlib import Path
 from test.markers import windows_skip_marker
 
 from mock import MagicMock, patch
@@ -92,11 +91,10 @@ def test_should_return_a_random_full_path(self, mock_conf):
         mock_conf_inst = MagicMock()
         mock_conf.return_value = mock_conf_inst
         mock_conf_inst.get_value.return_value = "eva_datasets"
-        expected = Path("eva_datasets").resolve()
         actual = generate_file_path("test")
         self.assertTrue(actual.is_absolute())
         # Root directory must be the same, filename is random
-        self.assertTrue(expected.match(str(actual.parent)))
+        self.assertTrue("eva_datasets" in str(actual.parent))
 
         mock_conf_inst.get_value.return_value = None
         self.assertRaises(KeyError, generate_file_path)
diff --git a/test/utils/test_xdist.py b/test/utils/test_xdist.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+# Copyright 2018-2022 EVA
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+from pathlib import Path
+
+from eva.catalog.sql_config import SQLConfig
+from eva.configuration.configuration_manager import ConfigurationManager
+from eva.utils.generic_utils import prefix_worker_id_to_path
+
+
+class XdistTests(unittest.TestCase):
+    def test_prefix_worker_id_to_uri_in_sql_config(self):
+        os.environ["PYTEST_XDIST_WORKER"] = "gw1"
+        sql_config = SQLConfig()
+        self.assertTrue("gw1" in sql_config.worker_uri)
+
+        os.environ["PYTEST_XDIST_WORKER"] = ""
+        sql_config = SQLConfig()
+        self.assertFalse("gw1" in sql_config.worker_uri)
+
+    def test_suffix_pytest_xdist_worker_id_to_dir(self):
+        os.environ["PYTEST_XDIST_WORKER"] = "gw1"
+        foo_path = Path("foo")
+        configuration_manager = ConfigurationManager()
+        updated_path = configuration_manager.suffix_pytest_xdist_worker_id_to_dir(
+            foo_path
+        )
+        self.assertTrue("gw1" in str(updated_path))
+
+        os.environ["PYTEST_XDIST_WORKER"] = ""
+        updated_path = configuration_manager.suffix_pytest_xdist_worker_id_to_dir(
+            foo_path
+        )
+        self.assertFalse("gw1" in str(updated_path))
+
+    def test_prefix_worker_id_to_path_in_generic_utils(self):
+        os.environ["PYTEST_XDIST_WORKER"] = "gw1"
+        foo_path = Path("foo")
+        updated_path = prefix_worker_id_to_path(foo_path)
+        self.assertTrue("gw1" in str(updated_path))
+
+        os.environ["PYTEST_XDIST_WORKER"] = ""
+        updated_path = prefix_worker_id_to_path(foo_path)
+        self.assertFalse("gw1" in str(updated_path))