alteryx · angela97lin · Sep 14, 2021 · Aug 20, 2021 · Aug 20, 2021 · Aug 26, 2021
diff --git a/Makefile b/Makefile
@@ -8,8 +8,11 @@ clean:
 
 .PHONY: lint
 lint:
-	flake8 evalml && isort --check-only evalml && python docs/notebook_version_standardizer.py check-versions
+	isort --check-only evalml
+	python docs/notebook_version_standardizer.py check-versions
 	black evalml -t py39 --check
+	pydocstyle evalml/ --convention=google --add-ignore=D107 --add-select=D400 --match-dir='^(?!(tests)).*'
+	flake8 evalml
 
 .PHONY: lint-fix
 lint-fix:

diff --git a/contributing.md b/contributing.md
@@ -119,7 +119,12 @@ One of the package maintainers will then review your PR!
 * Make PRs as small as possible! Consider breaking your large changes into separate PRs. This will make code review easier, quicker, less bug-prone and more effective.
 * In the name of every branch you create, include the associated issue number if applicable.
 * If new changes are added to the branch you're basing your changes off of, consider using `git rebase -i base_branch` rather than merging the base branch, to keep history clean.
-* Always include a docstring for public methods and classes. Consider including docstrings for private methods too. Our docstring convention is [`sphinx.ext.napoleon`](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html).
+* Always include a docstring for public methods and classes. Consider including docstrings for private methods too. We use the [Google docstring convention](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings), and use the [`sphinx.ext.napoleon`](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) extension to parse our docstrings.
+* Although not explicitly enforced by the Google convention, keep the following stylistic conventions for docstrings in mind:
+    - First letter of each argument description should be capitalized.
+    - Docstring sentences should end in periods. This includes descriptions for each argument.
+    - Types should be written in lower-case. For example, use "bool" instead of "Bool".
+    - Always add the default value in the description of the argument, if applicable. For example, "Defaults to 1."
 * Use [PascalCase (upper camel case)](https://en.wikipedia.org/wiki/Camel_case#Variations_and_synonyms) for class names, and [snake_case](https://en.wikipedia.org/wiki/Snake_case) for method and class member names.
 * To distinguish private methods and class attributes from public ones, those which are private should be prefixed with an underscore
 * Any code which doesn't need to be public should be private. Use `@staticmethod` and `@classmethod` where applicable, to indicate no side effects.

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -4,4 +4,5 @@
 flake8==3.7.0
 black==21.5b1
 isort==5.0.0
-
+pydocstyle==6.1.1
+darglint==1.8.0
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -228,8 +228,7 @@
 
 
 class AccessorLevelDocumenter(Documenter):
-    """
-    Documenter subclass for objects on accessor level (methods, attributes).
+    """Documenter subclass for objects on accessor level (methods, attributes).
 
     Referenced pandas-sphinx-theme (https://github.com/pandas-dev/pandas-sphinx-theme)
     and sphinx-doc (https://github.com/sphinx-doc/sphinx/blob/8c7faed6fcbc6b7d40f497698cb80fc10aee1ab3/sphinx/ext/autodoc/__init__.py#L846)
@@ -243,10 +242,7 @@ def resolve_name(self, modname, parents, path, base):
 
 
 class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter):
-    """
-    This documenter lets us removes .__call__ from the method signature for
-    callable accessors like Series.plot
-    """
+    """This documenter lets us removes .__call__ from the method signature for callable accessors like Series.plot."""
 
     objtype = "accessorcallable"
     directivetype = "method"
@@ -269,7 +265,7 @@ class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter):
 
 
 class PatchedPythonDomain(PythonDomain):
-    """To disable cross-reference warning: https://github.com/sphinx-doc/sphinx/issues/3866"""
+    """To disable cross-reference warning: https://github.com/sphinx-doc/sphinx/issues/3866."""
     def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
         if 'refspecific' in node:
             del node['refspecific']

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -5,6 +5,8 @@ Release Notes
     * Fixes
         * Fixed bug where warnings during ``make_pipeline`` were not being raised to the user :pr:`2765`
     * Changes
+        * Added docstring linting packages ``pydocstyle`` and ``darglint`` to `make-lint` command :pr:`2670`
+    * Documentation Changes
     * Testing Changes
 
 .. warning::

diff --git a/docs/source/user_guide/components.ipynb b/docs/source/user_guide/components.ipynb
@@ -162,7 +162,7 @@
     "    def __init__(self, pct_null_threshold=1.0, random_seed=0, **kwargs):\n",
     "        \"\"\"Initalizes an transformer to drop features whose percentage of NaN values exceeds a specified threshold.\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            pct_null_threshold(float): The percentage of NaN values in an input feature to drop.\n",
     "                Must be a value between [0, 1] inclusive. If equal to 0.0, will drop columns with any null values.\n",
     "                If equal to 1.0, will drop columns with all null values. Defaults to 0.95.\n",
@@ -180,7 +180,7 @@
     "    def fit(self, X, y=None):\n",
     "        \"\"\"Fits DropNullColumns component to data\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            X (pd.DataFrame): The input training data of shape [n_samples, n_features]\n",
     "            y (pd.Series, optional): The target training data of length [n_samples]\n",
     "\n",
@@ -200,7 +200,7 @@
     "    def transform(self, X, y=None):\n",
     "        \"\"\"Transforms data X by dropping columns that exceed the threshold of null values.\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            X (pd.DataFrame): Data to transform\n",
     "            y (pd.Series, optional): Ignored.\n",
     "\n",
@@ -272,7 +272,7 @@
     "    def __init__(self, strategy=\"mean\", random_seed=0, **kwargs):\n",
     "        \"\"\"Baseline regressor that uses a simple strategy to make predictions.\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            strategy (str): Method used to predict. Valid options are \"mean\", \"median\". Defaults to \"mean\".\n",
     "            random_seed (int): Seed for the random number generator. Defaults to 0.\n",
     "\n",

diff --git a/docs/source/user_guide/data_checks.ipynb b/docs/source/user_guide/data_checks.ipynb
@@ -628,7 +628,7 @@
     "    def __init__(self, problem_type, objective):\n",
     "        \"\"\"\n",
     "        A collection of basic data checks.\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            problem_type (str): The problem type that is being validated. Can be regression, binary, or multiclass.\n",
     "        \"\"\"\n",
     "        if handle_problem_types(problem_type) == ProblemTypes.REGRESSION:\n",

diff --git a/docs/source/user_guide/objectives.ipynb b/docs/source/user_guide/objectives.ipynb
@@ -142,7 +142,7 @@
     "                 fraud_payout_percentage=1.0, amount_col='amount'):\n",
     "        \"\"\"Create instance of FraudCost\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            retry_percentage (float): What percentage of customers that will retry a transaction if it\n",
     "                is declined. Between 0 and 1. Defaults to .5\n",
     "\n",
@@ -162,7 +162,7 @@
     "    def decision_function(self, ypred_proba, threshold=0.0, X=None):\n",
     "        \"\"\"Determine if a transaction is fraud given predicted probabilities, threshold, and dataframe with transaction amount\n",
     "\n",
-    "            Arguments:\n",
+    "            Args:\n",
     "                ypred_proba (pd.Series): Predicted probablities\n",
     "                X (pd.DataFrame): Dataframe containing transaction amount\n",
     "                threshold (float): Dollar threshold to determine if transaction is fraud\n",
@@ -182,7 +182,7 @@
     "    def objective_function(self, y_true, y_predicted, X):\n",
     "        \"\"\"Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount\n",
     "\n",
-    "            Arguments:\n",
+    "            Args:\n",
     "                y_predicted (pd.Series): predicted fraud labels\n",
     "                y_true (pd.Series): true fraud labels\n",
     "                X (pd.DataFrame): dataframe with transaction amounts\n",

diff --git a/docs/source/user_guide/pipelines.ipynb b/docs/source/user_guide/pipelines.ipynb
@@ -398,7 +398,7 @@
     "    def __init__(self, pct_null_threshold=1.0, random_seed=0, **kwargs):\n",
     "        \"\"\"Initalizes an transformer to drop features whose percentage of NaN values exceeds a specified threshold.\n",
     "\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            pct_null_threshold(float): The percentage of NaN values in an input feature to drop.\n",
     "                Must be a value between [0, 1] inclusive. If equal to 0.0, will drop columns with any null values.\n",
     "                If equal to 1.0, will drop columns with all null values. Defaults to 0.95.\n",
@@ -426,7 +426,7 @@
     "\n",
     "    def transform(self, X, y=None):\n",
     "        \"\"\"Transforms data X by dropping columns that exceed the threshold of null values.\n",
-    "        Arguments:\n",
+    "        Args:\n",
     "            X (pd.DataFrame): Data to transform\n",
     "            y (pd.Series, optional): Targets\n",
     "        Returns:\n",

diff --git a/evalml/__init__.py b/evalml/__init__.py
@@ -1,3 +1,4 @@
+"""EvalML."""
 import warnings
 
 # hack to prevent warnings from skopt

diff --git a/evalml/__main__.py b/evalml/__main__.py
@@ -1,15 +1,19 @@
+"""CLI commands."""
+
 import click
 
 from evalml.utils.cli_utils import print_info
 
 
 @click.group()
 def cli():
+    """CLI command with no arguments. Does nothing."""
     pass
 
 
 @click.command()
 def info():
+    """CLI command with `info` argument. Prints info about the system, evalml, and dependencies of evalml."""
     print_info()
 
 

diff --git a/evalml/automl/__init__.py b/evalml/automl/__init__.py
@@ -1,3 +1,4 @@
+"""AutoMLSearch and related modules."""
 from .automl_search import AutoMLSearch, search_iterative, search
 from .utils import (
     get_default_primary_search_objective,

diff --git a/evalml/automl/automl_algorithm/__init__.py b/evalml/automl/automl_algorithm/__init__.py
@@ -1,3 +1,4 @@
+"""AutoML algorithms that power EvalML."""
 from .automl_algorithm import AutoMLAlgorithm, AutoMLAlgorithmException
 from .iterative_algorithm import IterativeAlgorithm
 from .default_algorithm import DefaultAlgorithm
diff --git a/evalml/automl/automl_algorithm/automl_algorithm.py b/evalml/automl/automl_algorithm/automl_algorithm.py
@@ -1,24 +1,24 @@
+"""Base class for the AutoML algorithms which power EvalML."""
 from abc import ABC, abstractmethod
 
 from evalml.exceptions import PipelineNotFoundError
 from evalml.tuners import SKOptTuner
 
 
 class AutoMLAlgorithmException(Exception):
-    """Exception raised when an error is encountered during the computation of the automl algorithm"""
+    """Exception raised when an error is encountered during the computation of the automl algorithm."""
 
     pass
 
 
 class AutoMLAlgorithm(ABC):
-    """
-    Base class for the AutoML algorithms which power EvalML.
+    """Base class for the AutoML algorithms which power EvalML.
 
     This class represents an automated machine learning (AutoML) algorithm. It encapsulates the decision-making logic behind an automl search, by both deciding which pipelines to evaluate next and by deciding what set of parameters to configure the pipeline with.
 
     To use this interface, you must define a next_batch method which returns the next group of pipelines to evaluate on the training data. That method may access state and results recorded from the previous batches, although that information is not tracked in a general way in this base class. Overriding add_result is a convenient way to record pipeline evaluation info if necessary.
 
-    Arguments:
+    Args:
         allowed_pipelines (list(class)): A list of PipelineBase subclasses indicating the pipelines allowed in the search. The default of None indicates all pipelines for this problem type are allowed.
         custom_hyperparameters (dict): Custom hyperparameter ranges specified for pipelines to iterate over.
         max_iterations (int): The maximum number of iterations to be evaluated.
@@ -51,19 +51,22 @@ def __init__(
 
     @abstractmethod
     def next_batch(self):
-        """Get the next batch of pipelines to evaluate
+        """Get the next batch of pipelines to evaluate.
 
         Returns:
-            list(PipelineBase): a list of instances of PipelineBase subclasses, ready to be trained and evaluated.
+            list[PipelineBase]: A list of instances of PipelineBase subclasses, ready to be trained and evaluated.
         """
 
     def add_result(self, score_to_minimize, pipeline, trained_pipeline_results):
-        """Register results from evaluating a pipeline
+        """Register results from evaluating a pipeline.
 
-        Arguments:
+        Args:
             score_to_minimize (float): The score obtained by this pipeline on the primary objective, converted so that lower values indicate better pipelines.
             pipeline (PipelineBase): The trained pipeline object which was used to compute the score.
             trained_pipeline_results (dict): Results from training a pipeline.
+
+        Raises:
+            PipelineNotFoundError: If pipeline is not allowed in search.
         """
         if pipeline.name not in self._tuners:
             raise PipelineNotFoundError(

diff --git a/evalml/automl/automl_algorithm/default_algorithm.py b/evalml/automl/automl_algorithm/default_algorithm.py
@@ -1,3 +1,4 @@
+"""An automl algorithm that consists of two modes: fast and long, where fast is a subset of long."""
 import inspect
 
 import numpy as np
@@ -48,6 +49,21 @@ class DefaultAlgorithm(AutoMLAlgorithm):
     8. Repeat these indefinitely until stopping criterion is met:
         a. For each of the previous top 3 estimators, sample 10 parameters from the tuner. Run all 30 in one batch
         b. Run ensembling
+
+    Args:
+        X (pd.DataFrame): Training data.
+        y (pd.Series): Target data.
+        problem_type (ProblemType): Problem type associated with training data.
+        sampler_name (BaseSampler): Sampler to use for preprocessing.
+        tuner_class (class): A subclass of Tuner, to be used to find parameters for each pipeline. The default of None indicates the SKOptTuner will be used.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+        pipeline_params (dict or None): Pipeline-level parameters that should be passed to the proposed pipelines. Defaults to None.
+        custom_hyperparameters (dict or None): Custom hyperparameter ranges specified for pipelines to iterate over. Defaults to None.
+        n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines. Defaults to -1.
+        text_in_ensembling (boolean): If True and ensembling is True, then n_jobs will be set to 1 to avoid downstream sklearn stacking issues related to nltk. Defaults to None.
+        top_n (int): top n number of pipelines to use for long mode.
+        num_long_explore_pipelines (int): number of pipelines to explore for each top n pipeline at the start of long mode.
+        num_long_pipelines_per_batch (int): number of pipelines per batch for each top n pipeline through long mode.
     """
 
     def __init__(
@@ -66,23 +82,6 @@ def __init__(
         num_long_explore_pipelines=50,
         num_long_pipelines_per_batch=10,
     ):
-        """
-        Arguments:
-            X (pd.DataFrame): Training data
-            y (pd.Series): Target data
-            problem_type (ProblemType): Problem type associated with training data
-            sampler_name (BaseSampler): Sampler to use for preprocessing
-            tuner_class (class): A subclass of Tuner, to be used to find parameters for each pipeline. The default of None indicates the SKOptTuner will be used.
-            random_seed (int): Seed for the random number generator. Defaults to 0.
-            pipeline_params (dict or None): Pipeline-level parameters that should be passed to the proposed pipelines. Defaults to None.
-            custom_hyperparameters (dict or None): Custom hyperparameter ranges specified for pipelines to iterate over. Defaults to None.
-            n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines. Defaults to -1.
-            text_in_ensembling (boolean): If True and ensembling is True, then n_jobs will be set to 1 to avoid downstream sklearn stacking issues related to nltk. Defaults to None.
-            top_n (int): top n number of pipelines to use for long mode.
-            num_long_explore_pipelines (int): number of pipelines to explore for each top n pipeline at the start of long mode.
-            num_long_pipelines_per_batch (int): number of pipelines per batch for each top n pipeline through long mode.
-        """
-
         super().__init__(
             allowed_pipelines=[],
             custom_hyperparameters=custom_hyperparameters,
@@ -286,7 +285,7 @@ def _create_long_exploration(self, n):
         return self._create_n_pipelines(pipelines, self.num_long_explore_pipelines)
 
     def next_batch(self):
-        """Get the next batch of pipelines to evaluate
+        """Get the next batch of pipelines to evaluate.
 
         Returns:
             list(PipelineBase): a list of instances of PipelineBase subclasses, ready to be trained and evaluated.
@@ -315,7 +314,7 @@ def next_batch(self):
     def add_result(self, score_to_minimize, pipeline, trained_pipeline_results):
         """Register results from evaluating a pipeline. In batch number 2, the selected column names from the feature selector are taken to be used in a column selector. Information regarding the best pipeline is updated here as well.
 
-        Arguments:
+        Args:
             score_to_minimize (float): The score obtained by this pipeline on the primary objective, converted so that lower values indicate better pipelines.
             pipeline (PipelineBase): The trained pipeline object which was used to compute the score.
             trained_pipeline_results (dict): Results from training a pipeline.