Add docstring style enforcer and update documentation (#59)

* Add docstring linter that follows pydocstyle * Manually fix documentation to adhere to style * Spell check PR template
georgian-io-archive · May 23, 2019 · 78e836d · 78e836d
1 parent c7db7f4
commit 78e836d
Show file tree

Hide file tree

Showing 34 changed files with 725 additions and 426 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,6 +1,13 @@
 [flake8]
-exclude = .git,__pycache__
+exclude = .git,__pycache__,doc/*
+ignore = 
+	W503, # Line break occurred after a binary operator (opposite of W504) 
+	D107 # Missing docstring in __init__
+	D301 # Use r""" if any backslashes in a docstring
 per-file-ignores =
 	foreshadow/transformers/externals.py:F401
+	foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
+	examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
+	# Ignore docs for tests and examples
 
-# Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
+# Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -1,5 +1,5 @@
 <!--
-Thanks you for taking the time to submit a pull request! Please take a look at some guideliens before submitting a pull request: https://github.com/georgianpartners/foreshadow/blob/development/doc/contrib.rst
+Thanks you for taking the time to submit a pull request! Please take a look at some guidelines before submitting a pull request: https://github.com/georgianpartners/foreshadow/blob/development/doc/contrib.rst
 -->
 
 ### Related Issue
@@ -12,5 +12,5 @@ is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
 
 ### Description
 <!--
-Please add a narrative description of your the changes made and the rationale behind them. If making a enhancement include the motivation and use cases addressed.
+Please add a narrative description of your the changes made and the rationale behind them. If making an enhancement include the motivation and use cases addressed.
 -->
diff --git a/foreshadow/__init__.py b/foreshadow/__init__.py
@@ -1,3 +1,5 @@
+"""An end-to-end AutoML package to streamline the datascience workflow."""
+
 from foreshadow.foreshadow import Foreshadow
 from foreshadow.preprocessor import Preprocessor
 

diff --git a/foreshadow/estimators/__init__.py b/foreshadow/estimators/__init__.py
@@ -1,3 +1,5 @@
+"""Estimators provided by foreshadow."""
+
 from foreshadow.estimators.auto import AutoEstimator
 from foreshadow.estimators.meta import MetaEstimator
 

diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py
@@ -1,6 +1,4 @@
-"""
-AutoEstimator and its selection
-"""
+"""AutoEstimator."""
 
 import warnings
 
@@ -12,20 +10,20 @@
 
 
 class AutoEstimator(BaseEstimator):
-    """An automatic machine learning solution wrapper selects the appropriate
-    solution for a given problem.
+    """An wrapped estimator that selects the solution for a given problem.
 
     By default each automatic machine learning solution runs for 1 minute but
-    that can be changed  through passed kwargs. Autosklearn is not required for
+    that can be changed through passed kwargs. Autosklearn is not required for
     this to work but if installed it can be used alongside TPOT.
 
     Args:
         problem_type (str): The problem type, 'regression' or 'classification'
         auto (str): The automatic estimator, 'tpot' or 'autosklearn'
-        include_preprocessors (bool): Whether include preprocessors in automl
+        include_preprocessors (bool): Whether include preprocessors in AutoML
             pipelines
         estimator_kwargs (dict): A dictionary of args to pass to the specified
             auto estimator (both problem_type and auto must be specified)
+
     """
 
     def __init__(
@@ -44,8 +42,9 @@ def __init__(
 
     @property
     def problem_type(self):
-        """Type of machine learning problem. Either 'regression' or
-        'classification'
+        """Type of machine learning problem.
+
+        Either `regression` or `classification`.
         """
         return self._problem_type
 
@@ -58,7 +57,10 @@ def problem_type(self, pt):
 
     @property
     def auto(self):
-        """Type of automl package. Either 'tpot' or 'autosklearn'"""
+        """Type of automl package.
+
+        Either `tpot` or `autosklearn`.
+        """
         return self._auto
 
     @auto.setter
@@ -70,7 +72,7 @@ def auto(self, ae):
 
     @property
     def estimator_kwargs(self):
-        """Dictionary of kwargs to pass to automl package"""
+        """Get dictionary of kwargs to pass to AutoML package."""
         return self._estimator_kwargs
 
     @estimator_kwargs.setter
@@ -88,8 +90,9 @@ def estimator_kwargs(self, ek):
             self._estimator_kwargs = {}
 
     def _get_optimal_estimator_class(self):
-        """Picks the optimal estimator class and defaults to a working
-        estimator if autosklearn is not installed
+        """Pick the optimal estimator class.
+
+        Defaults to a working estimator if autosklearn is not installed.
         """
         auto_ = self._pick_estimator() if self.auto is None else self.auto
 
@@ -132,20 +135,21 @@ def _get_optimal_estimator_class(self):
             )
 
     def _determine_problem_type(self, y):
-        """Simple heuristic to determine problem type"""
+        """Determine problem type using simple heuristic."""
         return (
             "classification"
             if np.unique(y.values.ravel()).size == 2
             else "regression"
         )
 
     def _pick_estimator(self):
-        """Pick auto estimator based on benchmarked results"""
+        """Pick auto estimator based on benchmarked results."""
         return "tpot" if self.problem_type == "regression" else "autosklearn"
 
     def _pre_configure_estimator_kwargs(self):
-        """Configure auto estimators to perform similarly (time scale) and
-        remove preprocessors if necessary
+        """Configure auto estimators to perform similarly (time scale).
+
+        Also remove preprocessors if necessary.
         """
         if self.auto == "tpot" and "config_dict" not in self.estimator_kwargs:
             self.estimator_kwargs["config_dict"] = get_tpot_config(
@@ -172,7 +176,7 @@ def _pre_configure_estimator_kwargs(self):
         return self.estimator_kwargs
 
     def _setup_estimator(self, y):
-        """Construct and return the auto estimator instance"""
+        """Construct and return the auto estimator instance."""
         self.problem_type = (
             self._determine_problem_type(y)
             if self.problem_type is None
@@ -185,8 +189,9 @@ def _setup_estimator(self, y):
         return self.estimator_class(**self.estimator_kwargs)
 
     def fit(self, X, y):
-        """Fits the AutoEstimator instance using a selected automatic machine
-        learning estimator
+        """Fit the AutoEstimator instance.
+
+        Uses the selected AutoML estimator.
 
         Args:
             data_df (pandas.DataFrame or numpy.ndarray or list): The input
@@ -196,6 +201,7 @@ def fit(self, X, y):
 
         Returns:
             The selected estimator
+
         """
         X = check_df(X)
         y = check_df(y)
@@ -205,37 +211,36 @@ def fit(self, X, y):
         return self.estimator
 
     def predict(self, X):
-        """Uses the trained estimator to predict the response for an input
-        dataset
+        """Use the trained estimator to predict the response.
 
         Args:
             data_df (pandas.DataFrame or numpy.ndarray or list): The input
                 feature(s)
 
         Returns:
             pandas.DataFrame: The response feature(s)
+
         """
         X = check_df(X)
         return self.estimator.predict(X)
 
     def predict_proba(self, X):  # pragma: no cover
-        """Uses the trained estimator to predict the probabilities of responses
-        for an input dataset
+        """Use the trained estimator to predict the responses probabilities.
 
         Args:
             data_df (pandas.DataFrame or numpy.ndarray or list): The input
                 feature(s)
 
         Returns:
-            pandas.DataFrame: The probability associated with each response
+            pandas.DataFrame: The probability associated with each response \
                 feature
+
         """
         X = check_df(X)
         return self.estimator.predict_proba(X)
 
     def score(self, X, y, sample_weight=None):
-        """Uses the trained estimator to compute the evaluation score defined
-        by the estimator
+        """Use the trained estimator to compute the evaluation score.
 
         Note: sample weights are not supported
 
@@ -246,6 +251,7 @@ def score(self, X, y, sample_weight=None):
 
         Returns:
             float: A computed prediction fitness score
+
         """
         X = check_df(X)
         y = check_df(y)

diff --git a/foreshadow/estimators/config.py b/foreshadow/estimators/config.py
@@ -1,20 +1,22 @@
-"""Configuration file for auto estimators"""
+"""Configuration file for AutoEstimators."""
 
-from tpot.config.classifier import (
-    classifier_config_dict as tpot_classifier_config,
-)
-from tpot.config.regressor import (
-    regressor_config_dict as tpot_regressor_config,
-)
+from tpot.config.classifier import classifier_config_dict
+from tpot.config.regressor import regressor_config_dict
 
 
 def get_tpot_config(type_, include_preprocessors=False):
-    """Gets default config from TPOT and drops all feature engineering
-    tools
+    """Get default configurations from TPOT.
+
+    Drops feature engineering steps by default.
+
+    Args:
+        include_preprocessors (bool, optional): whether or not to include
+            feature engineering steps.
+
     """
     configs = {
-        "classification": tpot_classifier_config,
-        "regression": tpot_regressor_config,
+        "classification": classifier_config_dict,
+        "regression": regressor_config_dict,
     }
 
     drop_partials = [

diff --git a/foreshadow/estimators/meta.py b/foreshadow/estimators/meta.py
@@ -1,36 +1,34 @@
-"""
-Wrapped Estimator
-"""
+"""Wrapped Estimator."""
 
 from sklearn.base import BaseEstimator
 
 from foreshadow.utils import check_df
 
 
 class MetaEstimator(BaseEstimator):
-    """A wrapper for estimators that allows data preprocessing on the response
-    variable(s) using Preprocessor
+    """Wrapper that allows data preprocessing on the response variable(s).
 
     Args:
         estimator: An instance of a subclass of
             :obj:`sklearn.base.BaseEstimator`
         preprocessor: An instance of
             :obj:`foreshadow.preprocessor.Preprocessor`
+
     """
 
     def __init__(self, estimator, preprocessor):
         self.estimator = estimator
         self.preprocessor = preprocessor
 
     def fit(self, X, y=None):
-        """Fits the AutoEstimator instance using a selected automatic machine
-        learning estimator
+        """Fit the AutoEstimator instance using a selected AutoML estimator.
 
         Args:
             X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
                 The input feature(s)
             y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
                 The response feature(s)
+
         """
         X = check_df(X)
         y = check_df(y)
@@ -39,37 +37,36 @@ def fit(self, X, y=None):
         return self
 
     def predict(self, X):
-        """Uses the trained estimator to predict the response for an input
-        dataset
+        """Use the trained estimator to predict the response.
 
         Args:
             X (pandas.DataFrame or :obj:`numpy.ndarray` or list): The input
                 feature(s)
 
         Returns:
             :obj:`pandas.DataFrame`: The response feature(s) (transformed)
+
         """
         X = check_df(X)
         return self.preprocessor.inverse_transform(self.estimator.predict(X))
 
     def predict_proba(self, X):
-        """Uses the trained estimator to predict the probabilities of responses
-        for an input dataset
+        """Use the trained estimator to predict the response probabilities.
 
         Args:
             X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
                 The input feature(s)
 
         Returns:
-            :obj:`pandas.DataFrame`: The probability associated with each
+            :obj:`pandas.DataFrame`: The probability associated with each \
                 feature
+
         """
         X = check_df(X)
         return self.estimator.predict_proba(X)
 
     def score(self, X, y):
-        """Uses the trained estimator to compute the evaluation score defined
-        by the estimator
+        """Use the trained estimator to compute the evaluation score.
 
         Note: sample weights are not supported
 
@@ -81,6 +78,7 @@ def score(self, X, y):
 
         Returns:
             float: A computed prediction fitness score
+
         """
         X = check_df(X)
         y = check_df(y)