Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Add docstring style enforcer and update documentation (#59)
Browse files Browse the repository at this point in the history
* Add docstring linter that follows pydocstyle
* Manually fix documentation to adhere to style
* Spell check PR template
  • Loading branch information
adithyabsk committed May 23, 2019
1 parent c7db7f4 commit 78e836d
Show file tree
Hide file tree
Showing 34 changed files with 725 additions and 426 deletions.
11 changes: 9 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
[flake8]
exclude = .git,__pycache__
exclude = .git,__pycache__,doc/*
ignore =
W503, # Line break occurred after a binary operator (opposite of W504)
D107 # Missing docstring in __init__
D301 # Use r""" if any backslashes in a docstring
per-file-ignores =
foreshadow/transformers/externals.py:F401
foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
# Ignore docs for tests and examples

# Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
# Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
4 changes: 2 additions & 2 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<!--
Thanks you for taking the time to submit a pull request! Please take a look at some guideliens before submitting a pull request: https://github.com/georgianpartners/foreshadow/blob/development/doc/contrib.rst
Thanks you for taking the time to submit a pull request! Please take a look at some guidelines before submitting a pull request: https://github.com/georgianpartners/foreshadow/blob/development/doc/contrib.rst
-->

### Related Issue
Expand All @@ -12,5 +12,5 @@ is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests

### Description
<!--
Please add a narrative description of your the changes made and the rationale behind them. If making a enhancement include the motivation and use cases addressed.
Please add a narrative description of your the changes made and the rationale behind them. If making an enhancement include the motivation and use cases addressed.
-->
2 changes: 2 additions & 0 deletions foreshadow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""An end-to-end AutoML package to streamline the datascience workflow."""

from foreshadow.foreshadow import Foreshadow
from foreshadow.preprocessor import Preprocessor

Expand Down
2 changes: 2 additions & 0 deletions foreshadow/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Estimators provided by foreshadow."""

from foreshadow.estimators.auto import AutoEstimator
from foreshadow.estimators.meta import MetaEstimator

Expand Down
60 changes: 33 additions & 27 deletions foreshadow/estimators/auto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""
AutoEstimator and its selection
"""
"""AutoEstimator."""

import warnings

Expand All @@ -12,20 +10,20 @@


class AutoEstimator(BaseEstimator):
"""An automatic machine learning solution wrapper selects the appropriate
solution for a given problem.
"""An wrapped estimator that selects the solution for a given problem.
By default each automatic machine learning solution runs for 1 minute but
that can be changed through passed kwargs. Autosklearn is not required for
that can be changed through passed kwargs. Autosklearn is not required for
this to work but if installed it can be used alongside TPOT.
Args:
problem_type (str): The problem type, 'regression' or 'classification'
auto (str): The automatic estimator, 'tpot' or 'autosklearn'
include_preprocessors (bool): Whether include preprocessors in automl
include_preprocessors (bool): Whether include preprocessors in AutoML
pipelines
estimator_kwargs (dict): A dictionary of args to pass to the specified
auto estimator (both problem_type and auto must be specified)
"""

def __init__(
Expand All @@ -44,8 +42,9 @@ def __init__(

@property
def problem_type(self):
"""Type of machine learning problem. Either 'regression' or
'classification'
"""Type of machine learning problem.
Either `regression` or `classification`.
"""
return self._problem_type

Expand All @@ -58,7 +57,10 @@ def problem_type(self, pt):

@property
def auto(self):
"""Type of automl package. Either 'tpot' or 'autosklearn'"""
"""Type of automl package.
Either `tpot` or `autosklearn`.
"""
return self._auto

@auto.setter
Expand All @@ -70,7 +72,7 @@ def auto(self, ae):

@property
def estimator_kwargs(self):
"""Dictionary of kwargs to pass to automl package"""
"""Get dictionary of kwargs to pass to AutoML package."""
return self._estimator_kwargs

@estimator_kwargs.setter
Expand All @@ -88,8 +90,9 @@ def estimator_kwargs(self, ek):
self._estimator_kwargs = {}

def _get_optimal_estimator_class(self):
"""Picks the optimal estimator class and defaults to a working
estimator if autosklearn is not installed
"""Pick the optimal estimator class.
Defaults to a working estimator if autosklearn is not installed.
"""
auto_ = self._pick_estimator() if self.auto is None else self.auto

Expand Down Expand Up @@ -132,20 +135,21 @@ def _get_optimal_estimator_class(self):
)

def _determine_problem_type(self, y):
"""Simple heuristic to determine problem type"""
"""Determine problem type using simple heuristic."""
return (
"classification"
if np.unique(y.values.ravel()).size == 2
else "regression"
)

def _pick_estimator(self):
"""Pick auto estimator based on benchmarked results"""
"""Pick auto estimator based on benchmarked results."""
return "tpot" if self.problem_type == "regression" else "autosklearn"

def _pre_configure_estimator_kwargs(self):
"""Configure auto estimators to perform similarly (time scale) and
remove preprocessors if necessary
"""Configure auto estimators to perform similarly (time scale).
Also remove preprocessors if necessary.
"""
if self.auto == "tpot" and "config_dict" not in self.estimator_kwargs:
self.estimator_kwargs["config_dict"] = get_tpot_config(
Expand All @@ -172,7 +176,7 @@ def _pre_configure_estimator_kwargs(self):
return self.estimator_kwargs

def _setup_estimator(self, y):
"""Construct and return the auto estimator instance"""
"""Construct and return the auto estimator instance."""
self.problem_type = (
self._determine_problem_type(y)
if self.problem_type is None
Expand All @@ -185,8 +189,9 @@ def _setup_estimator(self, y):
return self.estimator_class(**self.estimator_kwargs)

def fit(self, X, y):
"""Fits the AutoEstimator instance using a selected automatic machine
learning estimator
"""Fit the AutoEstimator instance.
Uses the selected AutoML estimator.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
Expand All @@ -196,6 +201,7 @@ def fit(self, X, y):
Returns:
The selected estimator
"""
X = check_df(X)
y = check_df(y)
Expand All @@ -205,37 +211,36 @@ def fit(self, X, y):
return self.estimator

def predict(self, X):
"""Uses the trained estimator to predict the response for an input
dataset
"""Use the trained estimator to predict the response.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
feature(s)
Returns:
pandas.DataFrame: The response feature(s)
"""
X = check_df(X)
return self.estimator.predict(X)

def predict_proba(self, X): # pragma: no cover
"""Uses the trained estimator to predict the probabilities of responses
for an input dataset
"""Use the trained estimator to predict the responses probabilities.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
feature(s)
Returns:
pandas.DataFrame: The probability associated with each response
pandas.DataFrame: The probability associated with each response \
feature
"""
X = check_df(X)
return self.estimator.predict_proba(X)

def score(self, X, y, sample_weight=None):
"""Uses the trained estimator to compute the evaluation score defined
by the estimator
"""Use the trained estimator to compute the evaluation score.
Note: sample weights are not supported
Expand All @@ -246,6 +251,7 @@ def score(self, X, y, sample_weight=None):
Returns:
float: A computed prediction fitness score
"""
X = check_df(X)
y = check_df(y)
Expand Down
24 changes: 13 additions & 11 deletions foreshadow/estimators/config.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
"""Configuration file for auto estimators"""
"""Configuration file for AutoEstimators."""

from tpot.config.classifier import (
classifier_config_dict as tpot_classifier_config,
)
from tpot.config.regressor import (
regressor_config_dict as tpot_regressor_config,
)
from tpot.config.classifier import classifier_config_dict
from tpot.config.regressor import regressor_config_dict


def get_tpot_config(type_, include_preprocessors=False):
"""Gets default config from TPOT and drops all feature engineering
tools
"""Get default configurations from TPOT.
Drops feature engineering steps by default.
Args:
include_preprocessors (bool, optional): whether or not to include
feature engineering steps.
"""
configs = {
"classification": tpot_classifier_config,
"regression": tpot_regressor_config,
"classification": classifier_config_dict,
"regression": regressor_config_dict,
}

drop_partials = [
Expand Down
26 changes: 12 additions & 14 deletions foreshadow/estimators/meta.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
"""
Wrapped Estimator
"""
"""Wrapped Estimator."""

from sklearn.base import BaseEstimator

from foreshadow.utils import check_df


class MetaEstimator(BaseEstimator):
"""A wrapper for estimators that allows data preprocessing on the response
variable(s) using Preprocessor
"""Wrapper that allows data preprocessing on the response variable(s).
Args:
estimator: An instance of a subclass of
:obj:`sklearn.base.BaseEstimator`
preprocessor: An instance of
:obj:`foreshadow.preprocessor.Preprocessor`
"""

def __init__(self, estimator, preprocessor):
self.estimator = estimator
self.preprocessor = preprocessor

def fit(self, X, y=None):
"""Fits the AutoEstimator instance using a selected automatic machine
learning estimator
"""Fit the AutoEstimator instance using a selected AutoML estimator.
Args:
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The input feature(s)
y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The response feature(s)
"""
X = check_df(X)
y = check_df(y)
Expand All @@ -39,37 +37,36 @@ def fit(self, X, y=None):
return self

def predict(self, X):
"""Uses the trained estimator to predict the response for an input
dataset
"""Use the trained estimator to predict the response.
Args:
X (pandas.DataFrame or :obj:`numpy.ndarray` or list): The input
feature(s)
Returns:
:obj:`pandas.DataFrame`: The response feature(s) (transformed)
"""
X = check_df(X)
return self.preprocessor.inverse_transform(self.estimator.predict(X))

def predict_proba(self, X):
"""Uses the trained estimator to predict the probabilities of responses
for an input dataset
"""Use the trained estimator to predict the response probabilities.
Args:
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The input feature(s)
Returns:
:obj:`pandas.DataFrame`: The probability associated with each
:obj:`pandas.DataFrame`: The probability associated with each \
feature
"""
X = check_df(X)
return self.estimator.predict_proba(X)

def score(self, X, y):
"""Uses the trained estimator to compute the evaluation score defined
by the estimator
"""Use the trained estimator to compute the evaluation score.
Note: sample weights are not supported
Expand All @@ -81,6 +78,7 @@ def score(self, X, y):
Returns:
float: A computed prediction fitness score
"""
X = check_df(X)
y = check_df(y)
Expand Down

0 comments on commit 78e836d

Please sign in to comment.