Skip to content

Commit

Permalink
Allowing get_objective to be able to get any objective. (#1132)
Browse files Browse the repository at this point in the history
* Allowing get_objective to be able to get any objective.

* Updating docs to use new objective names.

* Adding realse notes for PR 1132.

* Fixing coverage in test_objectives.py by using new all objectives lists.

* Fixing import order in test_objectives.

* Editing docstrings, adding helpful error when return_instance is True but instance can't be created.

* Adding more breaking changes to PR 1132 in release_notes.

* Adding test to check that get_objectives throws a type error when None is passed in.

* Only storing lowercase names in _all_objectives_dict. Making printing functions better.

* Adding fixtures for binary, multiclass, and regression objectives allowed in automl.

* Adding coverage for print_all_objective_names.

* Updating unit tests for test graphs to use lowercase objective names.

* Fixing get_objective call in graph_binary_objective

* Making misc edits - tidying comments and release notes.
  • Loading branch information
freddyaboulton committed Sep 4, 2020
1 parent 41b711a commit 221bceb
Show file tree
Hide file tree
Showing 21 changed files with 297 additions and 146 deletions.
4 changes: 2 additions & 2 deletions docs/source/demos/cost_benefit_matrix.ipynb
Expand Up @@ -116,12 +116,12 @@
"outputs": [],
"source": [
"from evalml import AutoMLSearch\n",
"automl = AutoMLSearch(problem_type='binary', objective='log_loss_binary')\n",
"automl = AutoMLSearch(problem_type='binary', objective='log loss binary')\n",
"automl.search(X, y)\n",
"\n",
"ll_pipeline = automl.best_pipeline\n",
"ll_pipeline.fit(X, y)\n",
"ll_pipeline.score(X, y, ['log_loss_binary'])"
"ll_pipeline.score(X, y, ['log loss binary'])"
]
},
{
Expand Down
12 changes: 12 additions & 0 deletions docs/source/release_notes.rst
Expand Up @@ -4,6 +4,8 @@ Release Notes
**Future Releases**
* Enhancements
* Added `output_format` field to explain predictions functions :pr:`1107`
* Modified `get_objective` and `get_objectives` to be able to return any objective in `evalml.objectives` :pr:`1132`
* Added a `return_instance` boolean parameter to `get_objective` :pr:`1132`
* Fixes
* Fixed XGBoost column names for partial dependence methods :pr:`1104`
* Removed dead code validating column type from `TextFeaturizer` :pr:`1122`
Expand All @@ -16,6 +18,16 @@ Release Notes
* Added test confirming `TextFeaturizer` never outputs null values :pr:`1122`
* Changed Python version of `Update Dependencies` action to 3.8.x :pr:`1137`

.. warning::

**Breaking Changes**
* `get_objective` will now return a class definition rather than an instance by default :pr:`1132`
* Deleted `OPTIONS` dictionary in `evalml.objectives.utils.py` :pr:`1132`
* If specifying an objective by string, the string must now match the objective's name field, case-insensitive :pr:`1132`
* Passing "Cost Benefit Matrix", "Fraud Cost", "Lead Scoring", "Mean Squared Log Error",
"Recall", "Recall Macro", "Recall Micro", "Recall Weighted", or "Root Mean Squared Log Error" to `AutoMLSearch` will now result in a `ValueError`
rather than an `ObjectiveNotFoundError` :pr:`1132`


**v0.13.1 Aug. 25, 2020**
* Enhancements
Expand Down
6 changes: 3 additions & 3 deletions docs/source/user_guide/model_understanding.ipynb
Expand Up @@ -45,7 +45,7 @@
"\n",
"pipeline = RFBinaryClassificationPipeline({})\n",
"pipeline.fit(X, y)\n",
"print(pipeline.score(X, y, objectives=['log_loss_binary']))"
"print(pipeline.score(X, y, objectives=['log loss binary']))"
]
},
{
Expand Down Expand Up @@ -98,7 +98,7 @@
"outputs": [],
"source": [
"from evalml.model_understanding.graphs import calculate_permutation_importance\n",
"calculate_permutation_importance(pipeline, X, y, 'log_loss_binary')"
"calculate_permutation_importance(pipeline, X, y, 'log loss binary')"
]
},
{
Expand All @@ -108,7 +108,7 @@
"outputs": [],
"source": [
"from evalml.model_understanding.graphs import graph_permutation_importance\n",
"graph_permutation_importance(pipeline, X, y, 'log_loss_binary')"
"graph_permutation_importance(pipeline, X, y, 'log loss binary')"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/source/user_guide/pipelines.ipynb
Expand Up @@ -92,7 +92,7 @@
"pipeline = CustomMulticlassClassificationPipeline({})\n",
"pipeline.fit(X, y)\n",
"print(pipeline.predict(X))\n",
"print(pipeline.score(X, y, objectives=['log_loss_multi']))"
"print(pipeline.score(X, y, objectives=['log loss multiclass']))"
]
},
{
Expand Down
52 changes: 45 additions & 7 deletions evalml/automl/automl_search.py
Expand Up @@ -24,7 +24,23 @@
PipelineNotFoundError,
PipelineScoreError
)
from evalml.objectives import get_objective, get_objectives
from evalml.objectives import (
CostBenefitMatrix,
FraudCost,
LeadScoring,
MeanSquaredLogError,
Recall,
RecallMacro,
RecallMicro,
RecallWeighted,
RootMeanSquaredLogError,
get_objective,
get_objectives
)
from evalml.objectives.utils import (
_all_objectives_dict,
_print_objectives_in_table
)
from evalml.pipelines import (
BinaryClassificationPipeline,
MeanBaselineRegressionPipeline,
Expand All @@ -36,6 +52,7 @@
from evalml.problem_types import ProblemTypes, handle_problem_types
from evalml.tuners import SKOptTuner
from evalml.utils import convert_to_seconds, get_random_state
from evalml.utils.gen_utils import classproperty
from evalml.utils.logger import (
get_logger,
log_subtitle,
Expand All @@ -55,9 +72,9 @@ class AutoMLSearch:
# Necessary for "Plotting" documentation, since Sphinx does not work well with instance attributes.
plot = PipelineSearchPlots

_DEFAULT_OBJECTIVES = {'binary': 'log_loss_binary',
'multiclass': 'log_loss_multi',
'regression': 'r2'}
_DEFAULT_OBJECTIVES = {'binary': 'Log Loss Binary',
'multiclass': 'Log Loss Multiclass',
'regression': 'R2'}

def __init__(self,
problem_type=None,
Expand Down Expand Up @@ -147,19 +164,21 @@ def __init__(self,
self.optimize_thresholds = optimize_thresholds
if objective == 'auto':
objective = self._DEFAULT_OBJECTIVES[self.problem_type.value]
self.objective = get_objective(objective)
objective = get_objective(objective, return_instance=False)
self.objective = self._validate_objective(objective)
if self.data_split is not None and not issubclass(self.data_split.__class__, BaseCrossValidator):
raise ValueError("Not a valid data splitter")
if self.problem_type != self.objective.problem_type:
raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value))
if additional_objectives is None:
additional_objectives = get_objectives(self.problem_type)
additional_objectives = [obj for obj in get_objectives(self.problem_type) if obj not in self._objectives_not_allowed_in_automl]
# if our main objective is part of default set of objectives for problem_type, remove it
existing_main_objective = next((obj for obj in additional_objectives if obj.name == self.objective.name), None)
if existing_main_objective is not None:
additional_objectives.remove(existing_main_objective)
else:
additional_objectives = [get_objective(o) for o in additional_objectives]
additional_objectives = [self._validate_objective(obj) for obj in additional_objectives]
self.additional_objectives = additional_objectives

if max_time is None or isinstance(max_time, (int, float)):
Expand Down Expand Up @@ -212,13 +231,32 @@ def __init__(self,

self._validate_problem_type()

@classproperty
def _objectives_not_allowed_in_automl(self):
return {CostBenefitMatrix, FraudCost, LeadScoring,
MeanSquaredLogError, Recall, RecallMacro, RecallMicro, RecallWeighted, RootMeanSquaredLogError}

@classmethod
def print_objective_names_allowed_in_automl(cls):
names = [name for name, value in _all_objectives_dict().items() if value not in cls._objectives_not_allowed_in_automl]
_print_objectives_in_table(names)

def _validate_objective(self, objective):
if isinstance(objective, type):
if objective in self._objectives_not_allowed_in_automl:
raise ValueError(f"{objective.name} is not allowed in AutoML! "
"Use evalml.automl.AutoMLSearch.print_objective_names_allowed_in_automl() "
"to get all objective names allowed in automl.")
return objective()
return objective

@property
def data_check_results(self):
return self._data_check_results

def __str__(self):
def _print_list(obj_list):
lines = ['\t{}'.format(o.name) for o in obj_list]
lines = sorted(['\t{}'.format(o.name) for o in obj_list])
return '\n'.join(lines)

def _get_funct_name(function):
Expand Down
6 changes: 3 additions & 3 deletions evalml/model_understanding/graphs.py
Expand Up @@ -269,7 +269,7 @@ def calculate_permutation_importance(pipeline, X, y, objective, n_repeats=5, n_j
Returns:
Mean feature importance scores over 5 shuffles.
"""
objective = get_objective(objective)
objective = get_objective(objective, return_instance=True)
if objective.problem_type != pipeline.problem_type:
raise ValueError(f"Given objective '{objective.name}' cannot be used with '{pipeline.name}'")

Expand Down Expand Up @@ -351,7 +351,7 @@ def binary_objective_vs_threshold(pipeline, X, y, objective, steps=100):
pd.DataFrame: DataFrame with thresholds and the corresponding objective score calculated at each threshold
"""
objective = get_objective(objective)
objective = get_objective(objective, return_instance=True)
if objective.problem_type != ProblemTypes.BINARY:
raise ValueError("`binary_objective_vs_threshold` can only be calculated for binary classification objectives")
if objective.score_needs_proba:
Expand Down Expand Up @@ -386,7 +386,7 @@ def graph_binary_objective_vs_threshold(pipeline, X, y, objective, steps=100):
if jupyter_check():
import_or_raise("ipywidgets", warning=True)

objective = get_objective(objective)
objective = get_objective(objective, return_instance=True)
df = binary_objective_vs_threshold(pipeline, X, y, objective, steps)
title = f'{objective.name} Scores vs. Thresholds'
layout = _go.Layout(title={'text': title},
Expand Down
2 changes: 1 addition & 1 deletion evalml/objectives/__init__.py
Expand Up @@ -43,4 +43,4 @@
RecallMicro,
RecallWeighted
)
from .utils import get_objective, get_objectives
from .utils import get_objective, get_objectives, print_all_objective_names
115 changes: 72 additions & 43 deletions evalml/objectives/utils.py
@@ -1,63 +1,90 @@
from . import standard_metrics
from texttable import Texttable

from .objective_base import ObjectiveBase

from evalml.exceptions import ObjectiveNotFoundError
from evalml.problem_types import handle_problem_types
from evalml.utils.gen_utils import _get_subclasses


def _all_objectives_dict():
all_objectives = _get_subclasses(ObjectiveBase)
objectives_dict = {}
for objective in all_objectives:
if 'evalml.objectives' not in objective.__module__:
continue
objectives_dict[objective.name.lower()] = objective
return objectives_dict


def _print_objectives_in_table(names):
"""Print the list of objective names in a table.
Returns:
None
"""
def iterate_in_batches(sequence, batch_size):
return [sequence[pos:pos + batch_size] for pos in range(0, len(sequence), batch_size)]
batch_size = 4
table = Texttable()
table.set_deco(Texttable.BORDER | Texttable.HLINES | Texttable.VLINES)
for row in iterate_in_batches(sorted(names), batch_size):
if len(row) < batch_size:
row += [""] * (batch_size - len(row))
table.add_row(row)
print(table.draw())


OPTIONS = {
"accuracy_binary": standard_metrics.AccuracyBinary(),
"accuracy_multi": standard_metrics.AccuracyMulticlass(),
"balanced_accuracy_binary": standard_metrics.BalancedAccuracyBinary(),
"balanced_accuracy_multi": standard_metrics.BalancedAccuracyMulticlass(),
"f1": standard_metrics.F1(),
'f1_micro': standard_metrics.F1Micro(),
'f1_macro': standard_metrics.F1Macro(),
'f1_weighted': standard_metrics.F1Weighted(),
"precision": standard_metrics.Precision(),
"precision_micro": standard_metrics.PrecisionMicro(),
"precision_macro": standard_metrics.PrecisionMacro(),
"precision_weighted": standard_metrics.PrecisionWeighted(),
"auc": standard_metrics.AUC(),
"auc_micro": standard_metrics.AUCMicro(),
"auc_macro": standard_metrics.AUCMacro(),
"auc_weighted": standard_metrics.AUCWeighted(),
"log_loss_binary": standard_metrics.LogLossBinary(),
"log_loss_multi": standard_metrics.LogLossMulticlass(),
"mcc_binary": standard_metrics.MCCBinary(),
"mcc_multi": standard_metrics.MCCMulticlass(),
"rmse": standard_metrics.RootMeanSquaredError(),
"r2": standard_metrics.R2(),
"mae": standard_metrics.MAE(),
"mse": standard_metrics.MSE(),
"median_ae": standard_metrics.MedianAE(),
"max_error": standard_metrics.MaxError(),
"exp_var": standard_metrics.ExpVariance()
}


def get_objective(objective):
def print_all_objective_names():
"""Get all valid objective names in a table.
Returns:
None
"""
all_objectives_dict = _all_objectives_dict()
all_names = list(all_objectives_dict.keys())
_print_objectives_in_table(all_names)


def get_objective(objective, return_instance=False, **kwargs):
"""Returns the Objective object of the given objective name
Args:
objective (str): name of the objective
Arguments:
objective (str or ObjectiveBase): name or instance of the objective class.
return_instance (bool): Whether to return an instance of the objective. This only applies if objective
is of type str. Note that the instance will be initialized with default arguments.
**kwargs (Any): Any keyword arguments to pass into the objective. Only used when return_instance=True.
Returns:
Objective
ObjectiveBase if the parameter objective is of type ObjectiveBase. If objective is instead a valid
objective name, function will return the class corresponding to that name. If return_instance is True,
an instance of that objective will be returned.
"""
if objective is None:
raise TypeError("Objective parameter cannot be NoneType")
if isinstance(objective, ObjectiveBase):
return objective
all_objectives_dict = _all_objectives_dict()
if not isinstance(objective, str):
raise TypeError("If parameter objective is not a string, it must be an instance of ObjectiveBase!")
if objective.lower() not in all_objectives_dict:
raise ObjectiveNotFoundError(f"{objective} is not a valid Objective! "
"Use evalml.objectives.print_all_objective_names(allowed_in_automl=False)"
"to get a list of all valid objective names. ")

objective_class = all_objectives_dict[objective.lower()]

if return_instance:
try:
return objective_class(**kwargs)
except TypeError as e:
raise TypeError(f"In get_objective, cannot pass in return_instance=True for {objective} because {str(e)}")

try:
objective = objective.lower()
return OPTIONS[objective]
except (AttributeError, KeyError):
raise ObjectiveNotFoundError("Could not find the specified objective.")
return objective_class


def get_objectives(problem_type):
"""Returns all objectives associated with the given problem type
"""Returns all objective classes associated with the given problem type.
Args:
problem_type (str/ProblemTypes): type of problem
Expand All @@ -66,4 +93,6 @@ def get_objectives(problem_type):
List of Objectives
"""
problem_type = handle_problem_types(problem_type)
return [obj for obj in OPTIONS.values() if obj.problem_type == problem_type]
all_objectives_dict = _all_objectives_dict()
objectives = [obj for obj in all_objectives_dict.values() if obj.problem_type == problem_type]
return objectives
2 changes: 1 addition & 1 deletion evalml/pipelines/binary_classification_pipeline.py
Expand Up @@ -25,7 +25,7 @@ def _predict(self, X, objective=None):
X_t = self._transform(X)

if objective is not None:
objective = get_objective(objective)
objective = get_objective(objective, return_instance=True)
if objective.problem_type != self.problem_type:
raise ValueError("You can only use a binary classification objective to make predictions for a binary classification pipeline.")

Expand Down
2 changes: 1 addition & 1 deletion evalml/pipelines/classification_pipeline.py
Expand Up @@ -127,7 +127,7 @@ def score(self, X, y, objectives):
if not isinstance(y, pd.Series):
y = pd.Series(y)

objectives = [get_objective(o) for o in objectives]
objectives = [get_objective(o, return_instance=True) for o in objectives]
y = self._encode_targets(y)
y_predicted, y_predicted_proba = self._compute_predictions(X, objectives)

Expand Down
2 changes: 1 addition & 1 deletion evalml/pipelines/regression_pipeline.py
Expand Up @@ -49,6 +49,6 @@ def score(self, X, y, objectives):
if not isinstance(y, pd.Series):
y = pd.Series(y)

objectives = [get_objective(o) for o in objectives]
objectives = [get_objective(o, return_instance=True) for o in objectives]
y_predicted = self.predict(X)
return self._score_all_objectives(X, y, y_predicted, y_pred_proba=None, objectives=objectives)

0 comments on commit 221bceb

Please sign in to comment.