automl · ravinkohli · Jun 2, 2021 · May 18, 2021 · May 18, 2021 · May 18, 2021
diff --git a/.codecov.yml b/.codecov.yml
@@ -0,0 +1,42 @@
+#see https://github.com/codecov/support/wiki/Codecov-Yaml
+codecov:
+  notify:
+    require_ci_to_pass: yes
+
+coverage:
+  precision: 2  # 2 = xx.xx%, 0 = xx%
+  round: nearest # how coverage is rounded: down/up/nearest
+  range: 10...90 # custom range of coverage colors from red -> yellow -> green
+  status:
+    # https://codecov.readme.io/v1.0/docs/commit-status
+    project:
+      default:
+        against: auto
+        target: 70% # specify the target coverage for each commit status
+        threshold: 50% # allow this little decrease on project
+        # https://github.com/codecov/support/wiki/Filtering-Branches
+        # branches: master
+        if_ci_failed: error
+    # https://github.com/codecov/support/wiki/Patch-Status
+    patch:
+      default:
+        against: auto
+        target: 30% # specify the target "X%" coverage to hit
+        threshold: 50% # allow this much decrease on patch
+    changes: false
+
+parsers:
+  gcov:
+    branch_detection:
+      conditional: true
+      loop: true
+      macro: false
+      method: false
+  javascript:
+    enable_partials: false
+
+comment:
+  layout: header, diff
+  require_changes: false
+  behavior: default  # update if exists else create new
+  branches: *
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,26 @@
+# .coveragerc to control coverage.py
+[run]
+branch = True
+
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+
+ignore_errors = True
+
+[html]
+directory = coverage_html_report
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -9,7 +9,10 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-      fail-fast:  false
+        include:
+          - python-version: 3.8
+            code-cov: true
+      fail-fast: false
       max-parallel: 2
 
     steps:
@@ -29,7 +32,7 @@ jobs:
         echo "::set-output name=BEFORE::$(git status --porcelain -b)"
     - name: Run tests
       run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
+        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml --cov-config=.coveragerc'; fi
         python -m pytest --forked --durations=20 --timeout=600 --timeout-method=signal -v $codecov test
     - name: Check for files left behind by test
       if: ${{ always() }}

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -34,6 +34,7 @@
     STRING_TO_OUTPUT_TYPES,
     STRING_TO_TASK_TYPES,
 )
+from autoPyTorch.data.base_validator import BaseInputValidator
 from autoPyTorch.datasets.base_dataset import BaseDataset
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
@@ -203,6 +204,8 @@ def __init__(
             self._multiprocessing_context = 'fork'
             self._dask_client = SingleThreadedClient()
 
+        self.InputValidator: Optional[BaseInputValidator] = None
+
         self.search_space_updates = search_space_updates
         if search_space_updates is not None:
             if not isinstance(self.search_space_updates,
@@ -273,8 +276,8 @@ def get_search_space(self, dataset: BaseDataset = None) -> ConfigurationSpace:
                                            include=self.include_components,
                                            exclude=self.exclude_components,
                                            search_space_updates=self.search_space_updates)
-        raise Exception("No search space initialised and no dataset passed. "
-                        "Can't create default search space without the dataset")
+        raise ValueError("No search space initialised and no dataset passed. "
+                         "Can't create default search space without the dataset")
 
     def _get_logger(self, name: str) -> PicklableClientLogger:
         """

diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
@@ -129,7 +129,10 @@ def __init__(
         if len(self.train_tensors) == 2 and self.train_tensors[1] is not None:
             self.output_type: str = type_of_target(self.train_tensors[1])
 
-            if STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS:
+            if (
+                self.output_type in STRING_TO_OUTPUT_TYPES
+                and STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS
+            ):
                 self.output_shape = len(np.unique(self.train_tensors[1]))
             else:
                 self.output_shape = self.train_tensors[1].shape[-1] if self.train_tensors[1].ndim > 1 else 1

diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -162,7 +162,10 @@ def stratified_k_fold_cross_validation(random_state: np.random.RandomState,
                                            indices: np.ndarray,
                                            **kwargs: Any
                                            ) -> List[Tuple[np.ndarray, np.ndarray]]:
-        cv = StratifiedKFold(n_splits=num_splits, random_state=random_state)
+
+        shuffle = kwargs.get('shuffle', True)
+        cv = StratifiedKFold(n_splits=num_splits, shuffle=shuffle,
+                             random_state=random_state if not shuffle else None)
         splits = list(cv.split(indices, kwargs["stratify"]))
         return splits
 

diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py
@@ -24,18 +24,6 @@
 )
 
 
-class Value2Index(object):
-    def __init__(self, values: list):
-        assert all(not (pd.isna(v)) for v in values)
-        self.values = {v: i for i, v in enumerate(values)}
-
-    def __getitem__(self, item: Any) -> int:
-        if pd.isna(item):
-            return 0
-        else:
-            return self.values[item] + 1
-
-
 class TabularDataset(BaseDataset):
     """
         Base class for datasets used in AutoPyTorch

diff --git a/autoPyTorch/search_space/__init__.py b/autoPyTorch/search_space/__init__.py
diff --git a/autoPyTorch/search_space/search_space.py b/autoPyTorch/search_space/search_space.py
diff --git a/setup.py b/setup.py
@@ -48,7 +48,10 @@
             "codecov",
             "pep8",
             "mypy",
-            "openml"
+            "openml",
+            "emcee",
+            "scikit-optimize",
+            "pyDOE",
         ],
         "examples": [
             "matplotlib",

diff --git a/test/conftest.py b/test/conftest.py
@@ -1,3 +1,4 @@
+import logging.handlers
 import os
 import re
 import shutil
@@ -299,6 +300,7 @@ def get_fit_dictionary(X, y, validator, backend):
         'metrics_during_training': True,
         'split_id': 0,
         'backend': backend,
+        'logger_port': logging.handlers.DEFAULT_TCP_LOGGING_PORT,
     }
     backend.save_datamanager(datamanager)
     return fit_dictionary