Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Add darglint, xdoctest, and clean up docs (#72)
Browse files Browse the repository at this point in the history
* Add darglint
* Add xdoctest
* Fix broken tests
* Change tests to use relative path best practices
  • Loading branch information
adithyabsk committed Jun 28, 2019
1 parent 9e4eb94 commit c2f1079
Show file tree
Hide file tree
Showing 37 changed files with 1,074 additions and 365 deletions.
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ omit = foreshadow/tests/*
[report]
exclude_lines =
pragma: no cover
def __repr__
raise AssertionError
raise NotImplementedError
if __name__ == .__main__.:
Expand Down
8 changes: 4 additions & 4 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[flake8]
exclude = .git,__pycache__,doc/*
exclude = .git,__pycache__,doc/*,build,dist
ignore =
W503, # Line break occurred after a binary operator (opposite of W504)
D107 # Missing docstring in __init__
D301 # Use r""" if any backslashes in a docstring
per-file-ignores =
foreshadow/transformers/externals.py:F401
foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414
# Ignore docs for tests and examples
foreshadow/tests/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414 I101 I102 I103 I201 I202 I203 I301 I302 I401 I402 I501 S001 S002
examples/*:D100 D101 D102 D103 D104 D105 D106 D107 D200 D201 D202 D203 D204 D205 D206 D207 D208 D209 D210 D211 D212 D213 D214 D215 D300 D301 D302 D400 D401 D401 D402 D403 D404 D405 D406 D407 D408 D409 D410 D411 D412 D413 D414 I101 I102 I103 I201 I202 I203 I301 I302 I401 I402 I501 S001 S002
# Ignore doclinter for tests and examples

# Migrate to pyproject.toml when: https://gitlab.com/pycqa/flake8/issues/428
4 changes: 3 additions & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
def get_version():
import toml

with open("../pyproject.toml", "r") as fopen:
toml_path = os.path.join(os.path.dirname(__file__), "..", "pyproject.toml")

with open(toml_path, "r") as fopen:
pyproject = toml.load(fopen)

return pyproject["tool"]["poetry"]["version"]
Expand Down
4 changes: 3 additions & 1 deletion examples/adult_1.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os

import pandas as pd
from sklearn.linear_model import LogisticRegression
Expand All @@ -9,7 +10,8 @@


RANDOM_SEED = 42
adult = pd.read_csv("adult.csv").iloc[:1000]
adult_path = os.path.join(os.path.dirname(__file__), "adult.csv")
adult = pd.read_csv(adult_path).iloc[:1000]

print(adult.head())
features = adult.drop(columns="class")
Expand Down
5 changes: 3 additions & 2 deletions examples/adult_2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os

import pandas as pd
from sklearn.linear_model import LogisticRegression
Expand All @@ -9,8 +10,8 @@


RANDOM_SEED = 42

adult = pd.read_csv("adult.csv").iloc[:1000]
adult_path = os.path.join(os.path.dirname(__file__), "adult.csv")
adult = pd.read_csv(adult_path).iloc[:1000]

print(adult.head())
features = adult.drop(columns="class")
Expand Down
86 changes: 78 additions & 8 deletions foreshadow/estimators/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,24 @@ def problem_type(self):
"""Type of machine learning problem.
Either `regression` or `classification`.
Returns:
self._problem_type
"""
return self._problem_type

@problem_type.setter
def problem_type(self, pt):
"""Set the problem type.
Args:
pt: problem type
Raises:
ValueError: pt not valid problem type
"""
pt_options = ["classification", "regression"]
if pt is not None and pt not in pt_options:
raise ValueError("problem type must be in {}".format(pt_options))
Expand All @@ -60,23 +73,50 @@ def auto(self):
"""Type of automl package.
Either `tpot` or `autosklearn`.
Returns:
self._auto, the type of automl package
"""
return self._auto

@auto.setter
def auto(self, ae):
"""Set type of automl package.
Args:
ae: automl packaage
Raises:
ValueError: ae not a valid ae
"""
ae_options = ["tpot", "autosklearn"]
if ae is not None and ae not in ae_options:
raise ValueError("auto must be in {}".format(ae_options))
self._auto = ae

@property
def estimator_kwargs(self):
"""Get dictionary of kwargs to pass to AutoML package."""
"""Get dictionary of kwargs to pass to AutoML package.
Returns:
estimator kwargs
"""
return self._estimator_kwargs

@estimator_kwargs.setter
def estimator_kwargs(self, ek):
"""Set estimator kwargs.
Args:
ek: kwargs
Raises:
ValueError: ek not a valid kwargs dict.
"""
if ek is not None and ek is not {}:
if not isinstance(ek, dict) or not all(
isinstance(k, str) for k in ek.keys()
Expand All @@ -93,6 +133,10 @@ def _get_optimal_estimator_class(self):
"""Pick the optimal estimator class.
Defaults to a working estimator if autosklearn is not installed.
Returns:
optimal estimator class.
"""
auto_ = self._pick_estimator() if self.auto is None else self.auto

Expand Down Expand Up @@ -135,21 +179,38 @@ def _get_optimal_estimator_class(self):
)

def _determine_problem_type(self, y):
"""Determine problem type using simple heuristic."""
"""Determine problem type using simple heuristic.
Args:
y: input labels
Returns:
problem type inferred from y.
"""
return (
"classification"
if np.unique(y.values.ravel()).size == 2
else "regression"
)

def _pick_estimator(self):
"""Pick auto estimator based on benchmarked results."""
"""Pick auto estimator based on benchmarked results.
Returns:
estimator
"""
return "tpot" if self.problem_type == "regression" else "autosklearn"

def _pre_configure_estimator_kwargs(self):
"""Configure auto estimators to perform similarly (time scale).
Also remove preprocessors if necessary.
Returns:
estimator kwargs
"""
if self.auto == "tpot" and "config_dict" not in self.estimator_kwargs:
self.estimator_kwargs["config_dict"] = get_tpot_config(
Expand All @@ -176,7 +237,15 @@ def _pre_configure_estimator_kwargs(self):
return self.estimator_kwargs

def _setup_estimator(self, y):
"""Construct and return the auto estimator instance."""
"""Construct and return the auto estimator instance.
Args:
y: input labels
Returns:
autoestimator instance
"""
self.problem_type = (
self._determine_problem_type(y)
if self.problem_type is None
Expand All @@ -194,9 +263,9 @@ def fit(self, X, y):
Uses the selected AutoML estimator.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
X (pandas.DataFrame or numpy.ndarray or list): The input
feature(s)
y_df (pandas.DataFrame or numpy.ndarray or list): The response
y (pandas.DataFrame or numpy.ndarray or list): The response
feature(s)
Returns:
Expand All @@ -214,7 +283,7 @@ def predict(self, X):
"""Use the trained estimator to predict the response.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
X (pandas.DataFrame or numpy.ndarray or list): The input
feature(s)
Returns:
Expand All @@ -228,7 +297,7 @@ def predict_proba(self, X): # pragma: no cover
"""Use the trained estimator to predict the responses probabilities.
Args:
data_df (pandas.DataFrame or numpy.ndarray or list): The input
X (pandas.DataFrame or numpy.ndarray or list): The input
feature(s)
Returns:
Expand All @@ -248,6 +317,7 @@ def score(self, X, y, sample_weight=None):
X (pandas.DataFrame or numpy.ndarray or list): The input feature(s)
y (pandas.DataFrame or numpy.ndarray or list): The response
feature(s)
sample_weight: sample weighting. Not implemented.
Returns:
float: A computed prediction fitness score
Expand Down
11 changes: 10 additions & 1 deletion foreshadow/estimators/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ def get_tpot_config(type_, include_preprocessors=False):
Args:
include_preprocessors (bool, optional): whether or not to include
feature engineering steps.
type_: type of classifier
Returns:
default config from TPOT
Raises:
ValueError: type_ not a valid type_
"""
configs = {
Expand All @@ -28,7 +35,9 @@ def get_tpot_config(type_, include_preprocessors=False):
"cluster",
]
if type_ not in configs.keys():
raise ValueError("type_ must be either classification or regression")
raise ValueError(
"type_: '{0}' not in : '{1}'".format(type_, configs.keys())
)
return (
{
k: v
Expand Down
23 changes: 13 additions & 10 deletions foreshadow/estimators/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ def fit(self, X, y=None):
"""Fit the AutoEstimator instance using a selected AutoML estimator.
Args:
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The input feature(s)
y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The response feature(s)
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
input feature(s)
y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
response feature(s)
Returns:
self
"""
X = check_df(X)
Expand All @@ -54,8 +57,8 @@ def predict_proba(self, X):
"""Use the trained estimator to predict the response probabilities.
Args:
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The input feature(s)
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
input feature(s)
Returns:
:obj:`pandas.DataFrame`: The probability associated with each \
Expand All @@ -71,10 +74,10 @@ def score(self, X, y):
Note: sample weights are not supported
Args:
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The input feature(s)
y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list):
The response feature(s)
X (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
input feature(s)
y (:obj:`pandas.DataFrame` or :obj:`numpy.ndarray` or list): The
response feature(s)
Returns:
float: A computed prediction fitness score
Expand Down
7 changes: 7 additions & 0 deletions foreshadow/foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
class Foreshadow(BaseEstimator):
"""An end-to-end pipeline to preprocess and tune a machine learning model.
Example:
>>> shadow = Foreshadow()
Args:
X_preprocessor \
(:obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`, \
Expand Down Expand Up @@ -67,6 +70,7 @@ def X_preprocessor(self):
:type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
.. # noqa: I201
"""
return self._X_preprocessor

Expand All @@ -93,6 +97,7 @@ def y_preprocessor(self):
:type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
.. # noqa: I201
"""
return self._y_preprocessor

Expand Down Expand Up @@ -120,6 +125,7 @@ def estimator(self):
:type: :obj:`sklearn.base.BaseEstimator`
.. # noqa: I201
"""
return self._estimator

Expand Down Expand Up @@ -148,6 +154,7 @@ def optimizer(self):
:setter: Verifies Optimizer class, defaults to None
.. # noqa: I201
"""
return self._optimizer

Expand Down

0 comments on commit c2f1079

Please sign in to comment.