Add darglint, xdoctest, and clean up docs (#72)

* Add darglint * Add xdoctest * Fix broken tests * Change tests to use relative path best practices
georgian-io-archive · Jun 28, 2019 · c2f1079 · c2f1079
1 parent 9e4eb94
commit c2f1079
Show file tree

Hide file tree

Showing 37 changed files with 1,074 additions and 365 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -6,7 +6,6 @@ omit = foreshadow/tests/*
 [report]
 exclude_lines =
     pragma: no cover
-    def __repr__
     raise AssertionError
     raise NotImplementedError
     if __name__ == .__main__.:

diff --git a/.flake8 b/.flake8
@@ -1,13 +1,13 @@
 [flake8]
-exclude = .git,__pycache__,doc/*
+exclude = .git,__pycache__,doc/*,build,dist
 ignore = 
 	W503, # Line break occurred after a binary operator (opposite of W504) 
 	D107 # Missing docstring in __init__
 	D301 # Use r""" if any backslashes in a docstring
 per-file-ignores =
 	foreshadow/transformers/externals.py:F401
-	foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
-	examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
-	# Ignore docs for tests and examples
+	foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414 I101 I102 I103 I201 I202 I203 I301 I302 I401 I402 I501 S001 S002
+	examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414 I101 I102 I103 I201 I202 I203 I301 I302 I401 I402 I501 S001 S002
+	# Ignore doclinter for tests and examples
 
 # Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
diff --git a/doc/conf.py b/doc/conf.py
@@ -27,7 +27,9 @@
 def get_version():
     import toml
 
-    with open("../pyproject.toml", "r") as fopen:
+    toml_path = os.path.join(os.path.dirname(__file__), "..", "pyproject.toml")
+
+    with open(toml_path, "r") as fopen:
         pyproject = toml.load(fopen)
 
     return pyproject["tool"]["poetry"]["version"]

diff --git a/examples/adult_1.py b/examples/adult_1.py
@@ -1,4 +1,5 @@
 import json
+import os
 
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
@@ -9,7 +10,8 @@
 
 
 RANDOM_SEED = 42
-adult = pd.read_csv("adult.csv").iloc[:1000]
+adult_path = os.path.join(os.path.dirname(__file__), "adult.csv")
+adult = pd.read_csv(adult_path).iloc[:1000]
 
 print(adult.head())
 features = adult.drop(columns="class")

diff --git a/examples/adult_2.py b/examples/adult_2.py
@@ -1,4 +1,5 @@
 import json
+import os
 
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
@@ -9,8 +10,8 @@
 
 
 RANDOM_SEED = 42
-
-adult = pd.read_csv("adult.csv").iloc[:1000]
+adult_path = os.path.join(os.path.dirname(__file__), "adult.csv")
+adult = pd.read_csv(adult_path).iloc[:1000]
 
 print(adult.head())
 features = adult.drop(columns="class")

diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py
@@ -45,11 +45,24 @@ def problem_type(self):
         """Type of machine learning problem.
 
         Either `regression` or `classification`.
+
+        Returns:
+            self._problem_type
+
         """
         return self._problem_type
 
     @problem_type.setter
     def problem_type(self, pt):
+        """Set the problem type.
+
+        Args:
+            pt: problem type
+
+        Raises:
+            ValueError: pt not valid problem type
+
+        """
         pt_options = ["classification", "regression"]
         if pt is not None and pt not in pt_options:
             raise ValueError("problem type must be in {}".format(pt_options))
@@ -60,23 +73,50 @@ def auto(self):
         """Type of automl package.
 
         Either `tpot` or `autosklearn`.
+
+        Returns:
+            self._auto, the type of automl package
+
         """
         return self._auto
 
     @auto.setter
     def auto(self, ae):
+        """Set type of automl package.
+
+        Args:
+            ae: automl packaage
+
+        Raises:
+            ValueError: ae not a valid ae
+
+        """
         ae_options = ["tpot", "autosklearn"]
         if ae is not None and ae not in ae_options:
             raise ValueError("auto must be in {}".format(ae_options))
         self._auto = ae
 
     @property
     def estimator_kwargs(self):
-        """Get dictionary of kwargs to pass to AutoML package."""
+        """Get dictionary of kwargs to pass to AutoML package.
+
+        Returns:
+            estimator kwargs
+
+        """
         return self._estimator_kwargs
 
     @estimator_kwargs.setter
     def estimator_kwargs(self, ek):
+        """Set estimator kwargs.
+
+        Args:
+            ek: kwargs
+
+        Raises:
+            ValueError: ek not a valid kwargs dict.
+
+        """
         if ek is not None and ek is not {}:
             if not isinstance(ek, dict) or not all(
                 isinstance(k, str) for k in ek.keys()
@@ -93,6 +133,10 @@ def _get_optimal_estimator_class(self):
         """Pick the optimal estimator class.
 
         Defaults to a working estimator if autosklearn is not installed.
+
+        Returns:
+            optimal estimator class.
+
         """
         auto_ = self._pick_estimator() if self.auto is None else self.auto
 
@@ -135,21 +179,38 @@ def _get_optimal_estimator_class(self):
             )
 
     def _determine_problem_type(self, y):
-        """Determine problem type using simple heuristic."""
+        """Determine problem type using simple heuristic.
+
+        Args:
+            y: input labels
+
+        Returns:
+            problem type inferred from y.
+
+        """
         return (
             "classification"
             if np.unique(y.values.ravel()).size == 2
             else "regression"
         )
 
     def _pick_estimator(self):
-        """Pick auto estimator based on benchmarked results."""
+        """Pick auto estimator based on benchmarked results.
+
+        Returns:
+            estimator
+
+        """
         return "tpot" if self.problem_type == "regression" else "autosklearn"
 
     def _pre_configure_estimator_kwargs(self):
         """Configure auto estimators to perform similarly (time scale).
 
         Also remove preprocessors if necessary.
+
+        Returns:
+            estimator kwargs
+
         """
         if self.auto == "tpot" and "config_dict" not in self.estimator_kwargs:
             self.estimator_kwargs["config_dict"] = get_tpot_config(
@@ -176,7 +237,15 @@ def _pre_configure_estimator_kwargs(self):
         return self.estimator_kwargs
 
     def _setup_estimator(self, y):
-        """Construct and return the auto estimator instance."""
+        """Construct and return the auto estimator instance.
+
+        Args:
+            y: input labels
+
+        Returns:
+            autoestimator instance
+
+        """
         self.problem_type = (
             self._determine_problem_type(y)
             if self.problem_type is None
@@ -194,9 +263,9 @@ def fit(self, X, y):
         Uses the selected AutoML estimator.
 
         Args:
-            data_df (pandas.DataFrame or numpy.ndarray or list): The input
+            X (pandas.DataFrame or numpy.ndarray or list): The input
                 feature(s)
-            y_df (pandas.DataFrame or numpy.ndarray or list): The response
+            y (pandas.DataFrame or numpy.ndarray or list): The response
                 feature(s)
 
         Returns:
@@ -214,7 +283,7 @@ def predict(self, X):
         """Use the trained estimator to predict the response.
 
         Args:
-            data_df (pandas.DataFrame or numpy.ndarray or list): The input
+            X (pandas.DataFrame or numpy.ndarray or list): The input
                 feature(s)
 
         Returns:
@@ -228,7 +297,7 @@ def predict_proba(self, X):  # pragma: no cover
         """Use the trained estimator to predict the responses probabilities.
 
         Args:
-            data_df (pandas.DataFrame or numpy.ndarray or list): The input
+            X (pandas.DataFrame or numpy.ndarray or list): The input
                 feature(s)
 
         Returns:
@@ -248,6 +317,7 @@ def score(self, X, y, sample_weight=None):
             X (pandas.DataFrame or numpy.ndarray or list): The input feature(s)
             y (pandas.DataFrame or numpy.ndarray or list): The response
                 feature(s)
+            sample_weight: sample weighting. Not implemented.
 
         Returns:
             float: A computed prediction fitness score

diff --git a/foreshadow/estimators/config.py b/foreshadow/estimators/config.py
@@ -12,6 +12,13 @@ def get_tpot_config(type_, include_preprocessors=False):
     Args:
         include_preprocessors (bool, optional): whether or not to include
             feature engineering steps.
+        type_: type of classifier
+
+    Returns:
+        default config from TPOT
+
+    Raises:
+        ValueError: type_ not a valid type_
 
     """
     configs = {
@@ -28,7 +35,9 @@ def get_tpot_config(type_, include_preprocessors=False):
         "cluster",
     ]
     if type_ not in configs.keys():
-        raise ValueError("type_ must be either classification or regression")
+        raise ValueError(
+            "type_: '{0}' not in : '{1}'".format(type_, configs.keys())
+        )
     return (
         {
             k: v

diff --git a/foreshadow/estimators/meta.py b/foreshadow/estimators/meta.py
@@ -24,10 +24,13 @@ def fit(self, X, y=None):
         """Fit the AutoEstimator instance using a selected AutoML estimator.
 
         Args:
-            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
-                The input feature(s)
-            y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
-                The response feature(s)
+            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
+                input feature(s)
+            y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
+                response feature(s)
+
+        Returns:
+            self
 
         """
         X = check_df(X)
@@ -54,8 +57,8 @@ def predict_proba(self, X):
         """Use the trained estimator to predict the response probabilities.
 
         Args:
-            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
-                The input feature(s)
+            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
+                input feature(s)
 
         Returns:
             :obj:`pandas.DataFrame`: The probability associated with each \
@@ -71,10 +74,10 @@ def score(self, X, y):
         Note: sample weights are not supported
 
         Args:
-            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
-                The input feature(s)
-            y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
-                The response feature(s)
+            X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
+                input feature(s)
+            y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
+                response feature(s)
 
         Returns:
             float: A computed prediction fitness score

diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py
@@ -18,6 +18,9 @@
 class Foreshadow(BaseEstimator):
     """An end-to-end pipeline to preprocess and tune a machine learning model.
 
+    Example:
+        >>> shadow = Foreshadow()
+
     Args:
         X_preprocessor \
             (:obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`, \
@@ -67,6 +70,7 @@ def X_preprocessor(self):
 
         :type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
 
+        .. # noqa: I201
         """
         return self._X_preprocessor
 
@@ -93,6 +97,7 @@ def y_preprocessor(self):
 
         :type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
 
+        .. # noqa: I201
         """
         return self._y_preprocessor
 
@@ -120,6 +125,7 @@ def estimator(self):
 
         :type: :obj:`sklearn.base.BaseEstimator`
 
+        .. # noqa: I201
         """
         return self._estimator
 
@@ -148,6 +154,7 @@ def optimizer(self):
 
         :setter: Verifies Optimizer class, defaults to None
 
+        .. # noqa: I201
         """
         return self._optimizer