diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst index 51b8fafe18..2e5e09e85b 100644 --- a/docs/source/api_reference.rst +++ b/docs/source/api_reference.rst @@ -104,6 +104,8 @@ Estimators LinearRegressor RandomForestRegressor +.. currentmodule:: evalml.pipelines + .. currentmodule:: evalml.pipelines @@ -185,7 +187,6 @@ Domain Specific FraudCost LeadScoring - Classification ~~~~~~~~~~~~~~ @@ -194,10 +195,18 @@ Classification :template: class.rst :nosignatures: + AUC + AUCMacro + AUCMicro + AUCWeighted F1 F1Micro F1Macro F1Weighted + LogLossBinary + LogLossMulticlass + MCCBinary + MCCMulticlass Precision PrecisionMicro PrecisionMacro @@ -206,15 +215,6 @@ Classification RecallMicro RecallMacro RecallWeighted - AUC - AUCMicro - AUCMacro - AUCWeighted - LogLoss - MCC - ROC - ConfusionMatrix - Regression ~~~~~~~~~~ @@ -224,14 +224,13 @@ Regression :template: class.rst :nosignatures: - R2 + ExpVariance MAE + MaxError + MedianAE MSE MSLE - MedianAE - MaxError - ExpVariance - + R2 Plot Metrics ~~~~~~~~~~~~ diff --git a/docs/source/objectives/custom_objectives.ipynb b/docs/source/objectives/custom_objectives.ipynb index c73c8a9f11..b9d0cc2f58 100644 --- a/docs/source/objectives/custom_objectives.ipynb +++ b/docs/source/objectives/custom_objectives.ipynb @@ -34,9 +34,7 @@ "class FraudCost(ObjectiveBase):\n", " \"\"\"Score the percentage of money lost of the total transaction amount process due to fraud\"\"\"\n", " name = \"Fraud Cost\"\n", - " needs_fitting = True\n", " greater_is_better = False\n", - " uses_extra_columns = True\n", " score_needs_proba = False\n", "\n", " def __init__(self, retry_percentage=.5, interchange_fee=.02,\n", @@ -116,4 +114,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/evalml/automl/auto_base.py b/evalml/automl/auto_base.py index 9a5d565c43..a6a8087ff3 100644 --- a/evalml/automl/auto_base.py +++ b/evalml/automl/auto_base.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +from sklearn.model_selection import train_test_split from tqdm import tqdm from .pipeline_search_plots import PipelineSearchPlots @@ -40,12 +41,11 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time, self.verbose = verbose self.possible_pipelines = get_pipelines(problem_type=self.problem_type, model_families=allowed_model_families) self.objective = get_objective(objective) + if self.problem_type != self.objective.problem_type: + raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value)) logger.verbose = verbose - if self.problem_type not in self.objective.problem_types: - raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value)) - if additional_objectives is not None: additional_objectives = [get_objective(o) for o in additional_objectives] else: @@ -228,10 +228,10 @@ def _check_stopping_condition(self, start): def _check_multiclass(self, y): if y.nunique() <= 2: return - if ProblemTypes.MULTICLASS not in self.objective.problem_types: + if self.objective.problem_type != ProblemTypes.MULTICLASS: raise ValueError("Given objective {} is not compatible with a multiclass problem.".format(self.objective.name)) for obj in self.additional_objectives: - if ProblemTypes.MULTICLASS not in obj.problem_types: + if obj.problem_type != ProblemTypes.MULTICLASS: raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name)) def _transform_parameters(self, pipeline_class, parameters, number_features): @@ -290,7 +290,18 @@ def _do_iteration(self, X, y, pbar, raise_errors): objectives_to_score = [self.objective] + self.additional_objectives try: - pipeline.fit(X_train, y_train, self.objective) + X_threshold_tuning = None + y_threshold_tuning = None + + if self.objective.problem_type == ProblemTypes.BINARY and self.objective.can_optimize_threshold: + X_train, X_threshold_tuning, y_train, y_threshold_tuning = train_test_split(X_train, y_train, test_size=0.2, random_state=pipeline.estimator.random_state) + pipeline.fit(X_train, y_train) + if self.objective.problem_type == ProblemTypes.BINARY: + pipeline.threshold = 0.5 + if self.objective.can_optimize_threshold: + y_predict_proba = pipeline.predict_proba(X_threshold_tuning) + y_predict_proba = y_predict_proba[:, 1] + pipeline.threshold = self.objective.optimize_threshold(y_predict_proba, y_threshold_tuning, X=X_threshold_tuning) scores = pipeline.score(X_test, y_test, objectives=objectives_to_score) score = scores[self.objective.name] plot_data.append(pipeline.get_plot_data(X_test, y_test, self.plot_metrics)) diff --git a/evalml/automl/auto_classification_search.py b/evalml/automl/auto_classification_search.py index 91c307626f..47c3a44b87 100644 --- a/evalml/automl/auto_classification_search.py +++ b/evalml/automl/auto_classification_search.py @@ -86,7 +86,8 @@ def __init__(self, objective = "precision_micro" problem_type = ProblemTypes.MULTICLASS else: - problem_type = self._set_problem_type(objective, multiclass) + objective = get_objective(objective) + problem_type = objective.problem_type super().__init__( tuner=tuner, @@ -110,27 +111,3 @@ def __init__(self, self.plot_metrics = [ROC(), ConfusionMatrix()] else: self.plot_metrics = [ConfusionMatrix()] - - def _set_problem_type(self, objective, multiclass): - """Sets the problem type of the AutoClassificationSearch to either binary or multiclass. - - If there is an objective either: - a. Set problem_type to MULTICLASS if objective is only multiclass and multiclass is false - b. Set problem_type to MUTLICLASS if multiclass is true - c. Default to BINARY - - Arguments: - objective (Object): the objective to optimize - multiclass (bool): boolean representing whether search is for multiclass problems or not - - Returns: - ProblemTypes enum representing type of problem to set AutoClassificationSearch to - - """ - problem_type = ProblemTypes.BINARY - # if exclusively multiclass: infer - if [ProblemTypes.MULTICLASS] == get_objective(objective).problem_types: - problem_type = ProblemTypes.MULTICLASS - elif multiclass: - problem_type = ProblemTypes.MULTICLASS - return problem_type diff --git a/evalml/objectives/__init__.py b/evalml/objectives/__init__.py index 4dd0b8102f..efab7f5dd3 100644 --- a/evalml/objectives/__init__.py +++ b/evalml/objectives/__init__.py @@ -4,17 +4,18 @@ from .objective_base import ObjectiveBase from .standard_metrics import ( AUC, - F1, - MCC, - R2, AUCMacro, AUCMicro, AUCWeighted, ExpVariance, + F1, F1Macro, F1Micro, F1Weighted, - LogLoss, + LogLossBinary, + LogLossMulticlass, + MCCBinary, + MCCMulticlass, MaxError, MAE, MedianAE, @@ -24,6 +25,7 @@ PrecisionMacro, PrecisionMicro, PrecisionWeighted, + R2, Recall, RecallMacro, RecallMicro, @@ -32,3 +34,6 @@ ConfusionMatrix ) from .utils import get_objective, get_objectives +from .binary_classification_objective import BinaryClassificationObjective +from .multiclass_classification_objective import MultiClassificationObjective +from .regression_objective import RegressionObjective diff --git a/evalml/objectives/binary_classification_objective.py b/evalml/objectives/binary_classification_objective.py new file mode 100644 index 0000000000..e01e2141e7 --- /dev/null +++ b/evalml/objectives/binary_classification_objective.py @@ -0,0 +1,62 @@ +import pandas as pd +from scipy.optimize import minimize_scalar + +from .objective_base import ObjectiveBase + +from evalml.problem_types import ProblemTypes + + +class BinaryClassificationObjective(ObjectiveBase): + """ + Base class for all binary classification objectives. + + problem_type (ProblemTypes): Specifies the type of problem this objective is defined for (binary classification) + can_optimize_threshold (bool): Determines if threshold used by objective can be optimized or not. + """ + problem_type = ProblemTypes.BINARY + + @property + def can_optimize_threshold(cls): + """Returns a boolean determining if we can optimize the binary classification objective threshold. This will be false for any objective that works directly with predicted probabilities, like log loss and AUC. Otherwise, it will be true.""" + return not cls.score_needs_proba + + def optimize_threshold(self, ypred_proba, y_true, X=None): + """Learn a binary classification threshold which optimizes the current objective. + + Arguments: + ypred_proba (list): The classifier's predicted probabilities + + y_true (list): The ground truth for the predictions. + + X (pd.DataFrame, optional): Any extra columns that are needed from training data. + + Returns: + Optimal threshold for this objective + """ + if not self.can_optimize_threshold: + raise RuntimeError("Trying to optimize objective that can't be optimized!") + + def cost(threshold): + predictions = self.decision_function(ypred_proba=ypred_proba, threshold=threshold, X=X) + cost = self.objective_function(predictions, y_true, X=X) + return -cost if self.greater_is_better else cost + + optimal = minimize_scalar(cost, method='Golden', options={"maxiter": 100}) + return optimal.x + + def decision_function(self, ypred_proba, threshold=0.5, X=None): + """Apply a learned threshold to predicted probabilities to get predicted classes. + + Arguments: + ypred_proba (list): The classifier's predicted probabilities + + threshold (float, optional): Threshold used to make a prediction. Defaults to 0.5. + + X (pd.DataFrame, optional): Any extra columns that are needed from training data. + + Returns: + predictions + """ + if not isinstance(ypred_proba, pd.Series): + ypred_proba = pd.Series(ypred_proba) + return ypred_proba > threshold diff --git a/evalml/objectives/fraud_cost.py b/evalml/objectives/fraud_cost.py index 931c2c251b..8f648db8e3 100644 --- a/evalml/objectives/fraud_cost.py +++ b/evalml/objectives/fraud_cost.py @@ -1,74 +1,68 @@ import pandas as pd -from .objective_base import ObjectiveBase +from .binary_classification_objective import BinaryClassificationObjective -from evalml.problem_types import ProblemTypes - -class FraudCost(ObjectiveBase): +class FraudCost(BinaryClassificationObjective): """Score the percentage of money lost of the total transaction amount process due to fraud""" name = "Fraud Cost" - problem_types = [ProblemTypes.BINARY] - needs_fitting = True greater_is_better = False - uses_extra_columns = True score_needs_proba = False def __init__(self, retry_percentage=.5, interchange_fee=.02, - fraud_payout_percentage=1.0, amount_col='amount', verbose=False): + fraud_payout_percentage=1.0, amount_col='amount'): """Create instance of FraudCost Arguments: - retry_percentage (float): what percentage of customers will retry a transaction if it - is declined? Between 0 and 1. Defaults to .5 + retry_percentage (float): What percentage of customers that will retry a transaction if it + is declined. Between 0 and 1. Defaults to .5 - interchange_fee (float): how much of each successful transaction do you collect? + interchange_fee (float): How much of each successful transaction you can collect. Between 0 and 1. Defaults to .02 - fraud_payout_percentage (float): how percentage of fraud will you be unable to collect. + fraud_payout_percentage (float): Percentage of fraud you will not be able to collect. Between 0 and 1. Defaults to 1.0 - amount_col (str): name of column in data that contains the amount. defaults to "amount" + amount_col (str): Name of column in data that contains the amount. Defaults to "amount" """ self.retry_percentage = retry_percentage self.interchange_fee = interchange_fee self.fraud_payout_percentage = fraud_payout_percentage self.amount_col = amount_col - super().__init__(verbose=verbose) - def decision_function(self, y_predicted, extra_cols, threshold): - """Determine if transaction is fraud given predicted probabilities, dataframe with transaction amount, and threshold + def decision_function(self, ypred_proba, threshold=0.0, X=None): + """Determine if a transaction is fraud given predicted probabilities, threshold, and dataframe with transaction amount Arguments: - y_predicted (pd.Series): predicted labels - extra_cols (pd.DataFrame): extra data needed - threshold (float): dollar threshold to determine if transaction is fraud + ypred_proba (pd.Series): Predicted probablities + X (pd.DataFrame): Dataframe containing transaction amount + threshold (float): Dollar threshold to determine if transaction is fraud Returns: - pd.Series: series of predicted fraud label using extra cols and threshold + pd.Series: Series of predicted fraud labels using X and threshold """ - if not isinstance(extra_cols, pd.DataFrame): - extra_cols = pd.DataFrame(extra_cols) + if not isinstance(X, pd.DataFrame): + X = pd.DataFrame(X) - if not isinstance(y_predicted, pd.Series): - y_predicted = pd.Series(y_predicted) + if not isinstance(ypred_proba, pd.Series): + ypred_proba = pd.Series(ypred_proba) - transformed_probs = (y_predicted.values * extra_cols[self.amount_col]) + transformed_probs = (ypred_proba.values * X[self.amount_col]) return transformed_probs > threshold - def objective_function(self, y_predicted, y_true, extra_cols): + def objective_function(self, y_predicted, y_true, X): """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount Arguments: y_predicted (pd.Series): predicted fraud labels y_true (pd.Series): true fraud labels - extra_cols (pd.DataFrame): extra data needed + X (pd.DataFrame): dataframe with transaction amounts Returns: float: amount lost to fraud per transaction """ - if not isinstance(extra_cols, pd.DataFrame): - extra_cols = pd.DataFrame(extra_cols) + if not isinstance(X, pd.DataFrame): + X = pd.DataFrame(X) if not isinstance(y_predicted, pd.Series): y_predicted = pd.Series(y_predicted) @@ -77,7 +71,10 @@ def objective_function(self, y_predicted, y_true, extra_cols): y_true = pd.Series(y_true) # extract transaction using the amount columns in users data - transaction_amount = extra_cols[self.amount_col] + try: + transaction_amount = X[self.amount_col] + except KeyError: + raise ValueError("`{}` is not a valid column in X.".format(self.amount_col)) # amount paid if transaction is fraud fraud_cost = transaction_amount * self.fraud_payout_percentage diff --git a/evalml/objectives/lead_scoring.py b/evalml/objectives/lead_scoring.py index 7d759ee1e0..4a7e0f8f74 100644 --- a/evalml/objectives/lead_scoring.py +++ b/evalml/objectives/lead_scoring.py @@ -1,40 +1,35 @@ import pandas as pd -from .objective_base import ObjectiveBase +from .binary_classification_objective import BinaryClassificationObjective -from evalml.problem_types import ProblemTypes - -class LeadScoring(ObjectiveBase): +class LeadScoring(BinaryClassificationObjective): """Lead scoring""" name = "Lead Scoring" - problem_types = [ProblemTypes.BINARY] - - needs_fitting = True greater_is_better = True score_needs_proba = False - name = "Lead Scoring" - def __init__(self, true_positives=1, false_positives=-1, verbose=False): + def __init__(self, true_positives=1, false_positives=-1): """Create instance. Arguments: - label (int) : label to optimize threshold for true_positives (int) : reward for a true positive false_positives (int) : cost for a false positive. Should be negative. """ self.true_positives = true_positives self.false_positives = false_positives - super().__init__(verbose=verbose) + def objective_function(self, y_predicted, y_true, X=None): + """Calculate the profit per lead. - def decision_function(self, y_predicted, threshold): - if not isinstance(y_predicted, pd.Series): - y_predicted = pd.Series(y_predicted) - - return y_predicted > threshold + Arguments: + y_predicted (pd.Series): predicted labels + y_true (pd.Series): true labels + X (pd.DataFrame): None, not used. - def objective_function(self, y_predicted, y_true): + Returns: + float: profit per lead + """ if not isinstance(y_predicted, pd.Series): y_predicted = pd.Series(y_predicted) diff --git a/evalml/objectives/multiclass_classification_objective.py b/evalml/objectives/multiclass_classification_objective.py new file mode 100644 index 0000000000..b9ea81834a --- /dev/null +++ b/evalml/objectives/multiclass_classification_objective.py @@ -0,0 +1,12 @@ +from .objective_base import ObjectiveBase + +from evalml.problem_types import ProblemTypes + + +class MultiClassificationObjective(ObjectiveBase): + """ + Base class for all multi-class classification objectives. + + problem_type (ProblemTypes): Specifies the type of problem this objective is defined for (multiclass classification). + """ + problem_type = ProblemTypes.MULTICLASS diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py index 7da9fe2fdc..1d0504b3f2 100644 --- a/evalml/objectives/objective_base.py +++ b/evalml/objectives/objective_base.py @@ -1,109 +1,54 @@ -from scipy.optimize import minimize_scalar +from abc import ABC, abstractmethod -from evalml.problem_types import handle_problem_types +class ObjectiveBase(ABC): + """Base class for all objectives.""" -class ObjectiveBase: - needs_fitting = False - greater_is_better = True - score_needs_proba = False - uses_extra_columns = False - problem_types = [] - - def __init__(self, verbose=False): - self.verbose = verbose - + @property @classmethod - def supports_problem_type(cls, problem_type): - """ Checks if objective supports given ProblemType - - Arguments: - problem_type(str or ProblemType): problem type to check - Returns: - bool: whether objective supports ProblemType - """ - problem_type = handle_problem_types(problem_type) - if problem_type in cls.problem_types: - return True - return False - - def fit(self, y_predicted, y_true, extra_cols=None): - """Learn the objective function based on the predictions from a model. - - If needs_fitting is false, this method won't be called + @abstractmethod + def name(cls): + """Returns a name describing the objective.""" + raise NotImplementedError("This objective must have a `name` attribute as a class variable") - Arguments: - y_predicted (list): the predictions from the model. If needs_proba is True, - it is the probability estimates - - y_true (list): the ground truth for the predictions. - - extra_cols (pd.DataFrame): any extra columns that are needed from training - data to fit. Only provided if uses_extra_columns is True. + @property + @classmethod + @abstractmethod + def greater_is_better(cls): + """Returns a boolean determining if a greater score indicates better model performance.""" + raise NotImplementedError("This objective must have a `greater_is_better` boolean attribute as a class variable") - Returns: - self + @property + @classmethod + @abstractmethod + def score_needs_proba(cls): + """Returns a boolean determining if the score() method needs probability estimates. This should be true for objectives which work with predicted probabilities, like log loss or AUC, and false for objectives which compare predicted class labels to the actual labels, like F1 or correlation. """ + raise NotImplementedError("This objective must have a `score_needs_proba` boolean attribute as a class variable") - def cost(threshold): - if extra_cols is not None: - predictions = self.decision_function(y_predicted, extra_cols, threshold) - cost = self.objective_function(predictions, y_true, extra_cols) - else: - predictions = self.decision_function(y_predicted, threshold) - cost = self.objective_function(predictions, y_true) - - if self.greater_is_better: - return -cost - - return cost - - self.optimal = minimize_scalar(cost, method='Golden', options={"maxiter": 100}) - self.threshold = self.optimal.x - - if self.verbose: - print("Best threshold found at: ", self.threshold) - - return self - - def predict(self, y_predicted, extra_cols=None): - """Apply the learned objective function to the output of a model. - - If needs_fitting is false, this method won't be called - - Arguments: - y_predicted: the prediction to transform to final prediction + @classmethod + @abstractmethod + def objective_function(cls, y_predicted, y_true, X=None): + """Computes the relative value of the provided predictions compared to the actual labels, according a specified metric + Arguments: + y_predicted (pd.Series) : predicted values of length [n_samples] + y_true (pd.Series) : actual class labels of length [n_samples] + X (pd.DataFrame or np.array) : extra data of shape [n_samples, n_features] necessary to calculate score Returns: - predictions + numerical value used to calculate score """ + raise NotImplementedError("`objective_function` must be implemented.") - if extra_cols is not None: - predictions = self.decision_function(y_predicted, extra_cols, self.threshold) - else: - predictions = self.decision_function(y_predicted, self.threshold) - - return predictions - - def score(self, y_predicted, y_true, extra_cols=None): - """Calculate score from applying fitted objective to predicted values - - If a higher score is better than a lower score, set greater_is_better attribute to True + def score(self, y_predicted, y_true, X=None): + """Returns a numerical score indicating performance based on the differences between the predicted and actual values. Arguments: - y_predicted (list): the predictions from the model. If needs_proba is True, - it is the probability estimates - - y_true (list): the ground truth for the predictions. - - extra_cols (pd.DataFrame): any extra columns that are needed from training - data to fit. Only provided if uses_extra_columns is True. + y_predicted (pd.Series) : predicted values of length [n_samples] + y_true (pd.Series) : actual class labels of length [n_samples] + X (pd.DataFrame or np.array) : extra data of shape [n_samples, n_features] necessary to calculate score Returns: score - """ - if extra_cols is not None: - return self.objective_function(y_predicted, y_true, extra_cols) - else: - return self.objective_function(y_predicted, y_true) + return self.objective_function(y_predicted, y_true, X=X) diff --git a/evalml/objectives/regression_objective.py b/evalml/objectives/regression_objective.py new file mode 100644 index 0000000000..abcf9d470e --- /dev/null +++ b/evalml/objectives/regression_objective.py @@ -0,0 +1,13 @@ +from .objective_base import ObjectiveBase + +from evalml.problem_types import ProblemTypes + + +class RegressionObjective(ObjectiveBase): + """ + Base class for all regression objectives. + + problem_type (ProblemTypes): type of problem this objective is. Set to ProblemTypes.REGRESSION. + """ + problem_type = ProblemTypes.REGRESSION + score_needs_proba = False diff --git a/evalml/objectives/standard_metrics.py b/evalml/objectives/standard_metrics.py index 77cdb3acf5..d49f64dffe 100644 --- a/evalml/objectives/standard_metrics.py +++ b/evalml/objectives/standard_metrics.py @@ -6,318 +6,288 @@ from sklearn.preprocessing import label_binarize from sklearn.utils.multiclass import unique_labels -from .objective_base import ObjectiveBase - -from evalml.problem_types import ProblemTypes +from .binary_classification_objective import BinaryClassificationObjective +from .multiclass_classification_objective import MultiClassificationObjective +from .regression_objective import RegressionObjective # todo does this need tuning? -class F1(ObjectiveBase): +class F1(BinaryClassificationObjective): """F1 score for binary classification""" - needs_fitting = False + name = "F1" greater_is_better = True score_needs_proba = False - name = "F1" - problem_types = [ProblemTypes.BINARY] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.f1_score(y_true, y_predicted) -class F1Micro(ObjectiveBase): +class F1Micro(MultiClassificationObjective): """F1 score for multiclass classification using micro averaging""" - needs_fitting = False + name = "F1 Micro" greater_is_better = True score_needs_proba = False - name = "F1 Micro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.f1_score(y_true, y_predicted, average='micro') -class F1Macro(ObjectiveBase): +class F1Macro(MultiClassificationObjective): """F1 score for multiclass classification using macro averaging""" - needs_fitting = False + name = "F1 Macro" greater_is_better = True score_needs_proba = False - name = "F1 Macro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.f1_score(y_true, y_predicted, average='macro') -class F1Weighted(ObjectiveBase): +class F1Weighted(MultiClassificationObjective): """F1 score for multiclass classification using weighted averaging""" - needs_fitting = False + name = "F1 Weighted" greater_is_better = True score_needs_proba = False - name = "F1 Weighted" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.f1_score(y_true, y_predicted, average='weighted') -class Precision(ObjectiveBase): +class Precision(BinaryClassificationObjective): """Precision score for binary classification""" - needs_fitting = False + name = "Precision" greater_is_better = True score_needs_proba = False - name = "Precision" - problem_types = [ProblemTypes.BINARY] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.precision_score(y_true, y_predicted) -class PrecisionMicro(ObjectiveBase): +class PrecisionMicro(MultiClassificationObjective): """Precision score for multiclass classification using micro averaging""" - needs_fitting = False + name = "Precision Micro" greater_is_better = True score_needs_proba = False - name = "Precision Micro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.precision_score(y_true, y_predicted, average='micro') -class PrecisionMacro(ObjectiveBase): +class PrecisionMacro(MultiClassificationObjective): """Precision score for multiclass classification using macro averaging""" - needs_fitting = False + name = "Precision Macro" greater_is_better = True score_needs_proba = False - name = "Precision Macro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.precision_score(y_true, y_predicted, average='macro') -class PrecisionWeighted(ObjectiveBase): +class PrecisionWeighted(MultiClassificationObjective): """Precision score for multiclass classification using weighted averaging""" - needs_fitting = False + name = "Precision Weighted" greater_is_better = True score_needs_proba = False - name = "Precision Weighted" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.precision_score(y_true, y_predicted, average='weighted') -class Recall(ObjectiveBase): +class Recall(BinaryClassificationObjective): """Recall score for binary classification""" - needs_fitting = False + name = "Recall" greater_is_better = True score_needs_proba = False - name = "Recall" - problem_types = [ProblemTypes.BINARY] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.recall_score(y_true, y_predicted) -class RecallMicro(ObjectiveBase): +class RecallMicro(MultiClassificationObjective): """Recall score for multiclass classification using micro averaging""" - needs_fitting = False + name = "Recall Micro" greater_is_better = True score_needs_proba = False - name = "Recall Micro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.recall_score(y_true, y_predicted, average='micro') -class RecallMacro(ObjectiveBase): +class RecallMacro(MultiClassificationObjective): """Recall score for multiclass classification using macro averaging""" - needs_fitting = False + name = "Recall Macro" greater_is_better = True score_needs_proba = False - name = "Recall Macro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.recall_score(y_true, y_predicted, average='macro') -class RecallWeighted(ObjectiveBase): +class RecallWeighted(MultiClassificationObjective): """Recall score for multiclass classification using weighted averaging""" - needs_fitting = False + name = "Recall Weighted" greater_is_better = True score_needs_proba = False - name = "Recall Weighted" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.recall_score(y_true, y_predicted, average='weighted') -class AUC(ObjectiveBase): +class AUC(BinaryClassificationObjective): """AUC score for binary classification""" - needs_fitting = False + name = "AUC" greater_is_better = True score_needs_proba = True - name = "AUC" - problem_types = [ProblemTypes.BINARY] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.roc_auc_score(y_true, y_predicted) -class AUCMicro(ObjectiveBase): +class AUCMicro(MultiClassificationObjective): """AUC score for multiclass classification using micro averaging""" - needs_fitting = False + name = "AUC Micro" greater_is_better = True score_needs_proba = True - name = "AUC Micro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): y_true, y_predicted = _handle_predictions(y_true, y_predicted) return metrics.roc_auc_score(y_true, y_predicted, average='micro') -class AUCMacro(ObjectiveBase): +class AUCMacro(MultiClassificationObjective): """AUC score for multiclass classification using macro averaging""" - needs_fitting = False + name = "AUC Macro" greater_is_better = True score_needs_proba = True - name = "AUC Macro" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): y_true, y_predicted = _handle_predictions(y_true, y_predicted) return metrics.roc_auc_score(y_true, y_predicted, average='macro') -class AUCWeighted(ObjectiveBase): +class AUCWeighted(MultiClassificationObjective): """AUC Score for multiclass classification using weighted averaging""" - needs_fitting = False + name = "AUC Weighted" greater_is_better = True score_needs_proba = True - name = "AUC Weighted" - problem_types = [ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): y_true, y_predicted = _handle_predictions(y_true, y_predicted) return metrics.roc_auc_score(y_true, y_predicted, average='weighted') -class LogLoss(ObjectiveBase): - """Log Loss for both binary and multiclass classification""" - needs_fitting = False +class LogLossBinary(BinaryClassificationObjective): + """Log Loss for binary classification""" + name = "Log Loss Binary" greater_is_better = False score_needs_proba = True - name = "Log Loss" - problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): + return metrics.log_loss(y_true, y_predicted) + + +class LogLossMulticlass(MultiClassificationObjective): + """Log Loss for multiclass classification""" + name = "Log Loss Multiclass" + greater_is_better = False + score_needs_proba = True + + def objective_function(self, y_predicted, y_true, X=None): return metrics.log_loss(y_true, y_predicted) -class MCC(ObjectiveBase): - """Matthews correlation coefficient for both binary and multiclass classification""" - needs_fitting = False +class MCCBinary(BinaryClassificationObjective): + """Matthews correlation coefficient for binary classification""" + name = "MCC Binary" greater_is_better = True score_needs_proba = False - name = "MCC" - problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.matthews_corrcoef(y_true, y_predicted) -class R2(ObjectiveBase): - """Coefficient of determination for regression""" - needs_fitting = False +class MCCMulticlass(MultiClassificationObjective): + """Matthews correlation coefficient for multiclass classification""" + name = "MCC Multiclass" greater_is_better = True score_needs_proba = False + + def objective_function(self, y_predicted, y_true, X=None): + return metrics.matthews_corrcoef(y_true, y_predicted) + + +class R2(RegressionObjective): + """Coefficient of determination for regression""" name = "R2" - problem_types = [ProblemTypes.REGRESSION] + greater_is_better = True + score_needs_proba = False - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.r2_score(y_true, y_predicted) -class MAE(ObjectiveBase): +class MAE(RegressionObjective): """Mean absolute error for regression""" - needs_fitting = False + name = "MAE" greater_is_better = False score_needs_proba = False - name = "MAE" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.mean_absolute_error(y_true, y_predicted) -class MSE(ObjectiveBase): +class MSE(RegressionObjective): """Mean squared error for regression""" - needs_fitting = False + name = "MSE" greater_is_better = False score_needs_proba = False - name = "MSE" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.mean_squared_error(y_true, y_predicted) -class MSLE(ObjectiveBase): +class MSLE(RegressionObjective): """Mean squared log error for regression""" - needs_fitting = False + name = "MSLE" greater_is_better = False score_needs_proba = False - name = "MSLE" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.mean_squared_log_error(y_true, y_predicted) -class MedianAE(ObjectiveBase): +class MedianAE(RegressionObjective): """Median absolute error for regression""" - needs_fitting = False + name = "MedianAE" greater_is_better = False score_needs_proba = False - name = "MedianAE" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.median_absolute_error(y_true, y_predicted) -class MaxError(ObjectiveBase): +class MaxError(RegressionObjective): """Maximum residual error for regression""" - needs_fitting = False + name = "MaxError" greater_is_better = False score_needs_proba = False - name = "MaxError" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.max_error(y_true, y_predicted) -class ExpVariance(ObjectiveBase): +class ExpVariance(RegressionObjective): """Explained variance score for regression""" - needs_fitting = False + name = "ExpVariance" greater_is_better = True score_needs_proba = False - name = "ExpVariance" - problem_types = [ProblemTypes.REGRESSION] - def score(self, y_predicted, y_true): + def objective_function(self, y_predicted, y_true, X=None): return metrics.explained_variance_score(y_true, y_predicted) class PlotMetric(ABC): - score_needs_proba = True name = None + score_needs_proba = False @abstractmethod def score(self, y_predicted, y_true): @@ -326,19 +296,16 @@ def score(self, y_predicted, y_true): class ROC(PlotMetric): """Receiver Operating Characteristic score for binary classification.""" - score_needs_proba = True name = "ROC" - problem_types = [ProblemTypes.BINARY] + score_needs_proba = True def score(self, y_predicted, y_true): return metrics.roc_curve(y_true, y_predicted) class ConfusionMatrix(PlotMetric): - """Confusion matrix for classification problems""" - score_needs_proba = False + """Confusion matrix for binary and multiclass classification problems""" name = "Confusion Matrix" - problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] def score(self, y_predicted, y_true): labels = unique_labels(y_predicted, y_true) diff --git a/evalml/objectives/utils.py b/evalml/objectives/utils.py index 65983dd146..6246a259ee 100644 --- a/evalml/objectives/utils.py +++ b/evalml/objectives/utils.py @@ -21,8 +21,10 @@ "auc_micro": standard_metrics.AUCMicro(), "auc_macro": standard_metrics.AUCMacro(), "auc_weighted": standard_metrics.AUCWeighted(), - "log_loss": standard_metrics.LogLoss(), - "mcc": standard_metrics.MCC(), + "log_loss_binary": standard_metrics.LogLossBinary(), + "log_loss_multi": standard_metrics.LogLossMulticlass(), + "mcc_binary": standard_metrics.MCCBinary(), + "mcc_multi": standard_metrics.MCCMulticlass(), "r2": standard_metrics.R2(), "mae": standard_metrics.MAE(), "mse": standard_metrics.MSE(), @@ -55,7 +57,7 @@ def get_objective(objective): def get_objectives(problem_type): - """Returns all objectives associated with the given problem types + """Returns all objectives associated with the given problem type Args: problem_type (str/ProblemTypes) : type of problem @@ -64,4 +66,4 @@ def get_objectives(problem_type): List of Objectives """ problem_type = handle_problem_types(problem_type) - return [OPTIONS[obj] for obj in OPTIONS if OPTIONS[obj].supports_problem_type(problem_type)] + return [obj for obj in OPTIONS.values() if obj.problem_type == problem_type] diff --git a/evalml/pipelines/binary_classification_pipeline.py b/evalml/pipelines/binary_classification_pipeline.py index f3803d2770..1a5e7469dd 100644 --- a/evalml/pipelines/binary_classification_pipeline.py +++ b/evalml/pipelines/binary_classification_pipeline.py @@ -1,85 +1,47 @@ from collections import OrderedDict import pandas as pd -from sklearn.model_selection import train_test_split from evalml.objectives import get_objective from evalml.pipelines.classification_pipeline import ClassificationPipeline +from evalml.problem_types import ProblemTypes class BinaryClassificationPipeline(ClassificationPipeline): - def fit(self, X, y, objective=None, objective_fit_size=0.2): - """Build a model - - Arguments: - X (pd.DataFrame or np.array): the input training data of shape [n_samples, n_features] - - y (pd.Series): the target training labels of length [n_samples] - - objective (Object or string): the objective to optimize - - objective_fit_size (float): the proportion of the dataset to include in the test split. - Returns: - - self - - """ - if not isinstance(X, pd.DataFrame): - X = pd.DataFrame(X) - - if not isinstance(y, pd.Series): - y = pd.Series(y) - - if objective is not None: - objective = get_objective(objective) - if objective.needs_fitting: - X, X_objective, y, y_objective = train_test_split(X, y, test_size=objective_fit_size, random_state=self.estimator.random_state) - - self._fit(X, y) - - if objective is not None: - if objective.needs_fitting: - y_predicted_proba = self.predict_proba(X_objective) - y_predicted_proba = y_predicted_proba[:, 1] - - if objective.uses_extra_columns: - objective.fit(y_predicted_proba, y_objective, X_objective) - else: - objective.fit(y_predicted_proba, y_objective) - return self + threshold = None + supported_problem_types = ['binary'] def predict(self, X, objective=None): """Make predictions using selected features. - Args: + Arguments: X (pd.DataFrame or np.array) : data of shape [n_samples, n_features] - objective (Object or string): the objective to use to predict - + objective (Object or string): the objective to use to make predictions Returns: pd.Series : estimated labels """ if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) - X_t = self._transform(X) if objective is not None: objective = get_objective(objective) - if objective.needs_fitting: - y_predicted_proba = self.predict_proba(X) - y_predicted_proba = y_predicted_proba[:, 1] - if objective.uses_extra_columns: - return objective.predict(y_predicted_proba, X) - else: - return objective.predict(y_predicted_proba) + if objective.problem_type != ProblemTypes.BINARY: + raise ValueError("You can only use a binary classification objective to make predictions for a binary classification pipeline.") - return self.estimator.predict(X_t) + if self.threshold is None: + return self.estimator.predict(X_t) + ypred_proba = self.predict_proba(X) + ypred_proba = ypred_proba[:, 1] + if objective is None: + return ypred_proba > self.threshold + return objective.decision_function(ypred_proba, threshold=self.threshold, X=X) def score(self, X, y, objectives): - """Evaluate model performance on current and additional objectives + """Evaluate model performance on objectives - Args: + Arguments: X (pd.DataFrame or np.array) : data of shape [n_samples, n_features] y (pd.Series) : true labels of length [n_samples] objectives (list): list of objectives to score @@ -108,11 +70,8 @@ def score(self, X, y, objectives): if y_predicted is None: y_predicted = self.predict(X, objective) y_predictions = y_predicted + scores.update({objective.name: objective.score(y_predictions, y, X=X)}) - if objective.uses_extra_columns: - scores.update({objective.name: objective.score(y_predictions, y, X)}) - else: - scores.update({objective.name: objective.score(y_predictions, y)}) return scores def get_plot_data(self, X, y, plot_metrics): diff --git a/evalml/pipelines/classification/catboost_binary.py b/evalml/pipelines/classification/catboost_binary.py index 7bc70af8c2..f79570d87d 100644 --- a/evalml/pipelines/classification/catboost_binary.py +++ b/evalml/pipelines/classification/catboost_binary.py @@ -10,7 +10,6 @@ class CatBoostBinaryClassificationPipeline(BinaryClassificationPipeline): Note: impute_strategy must support both string and numeric data """ component_graph = ['Simple Imputer', 'CatBoost Classifier'] - supported_problem_types = ['binary'] custom_hyperparameters = { "impute_strategy": ["most_frequent"], } diff --git a/evalml/pipelines/classification/catboost_multiclass.py b/evalml/pipelines/classification/catboost_multiclass.py index eae6ec6b51..af76f1ce15 100644 --- a/evalml/pipelines/classification/catboost_multiclass.py +++ b/evalml/pipelines/classification/catboost_multiclass.py @@ -10,7 +10,6 @@ class CatBoostMulticlassClassificationPipeline(MulticlassClassificationPipeline) Note: impute_strategy must support both string and numeric data """ component_graph = ['Simple Imputer', 'CatBoost Classifier'] - supported_problem_types = ['multiclass'] custom_hyperparameters = { "impute_strategy": ["most_frequent"], } diff --git a/evalml/pipelines/classification/logistic_regression_binary.py b/evalml/pipelines/classification/logistic_regression_binary.py index 3157e1414a..c05f33d2a1 100644 --- a/evalml/pipelines/classification/logistic_regression_binary.py +++ b/evalml/pipelines/classification/logistic_regression_binary.py @@ -4,4 +4,3 @@ class LogisticRegressionBinaryPipeline(BinaryClassificationPipeline): """Logistic Regression Pipeline for binary classification""" component_graph = ['One Hot Encoder', 'Simple Imputer', 'Standard Scaler', 'Logistic Regression Classifier'] - supported_problem_types = ['binary'] diff --git a/evalml/pipelines/classification/logistic_regression_multiclass.py b/evalml/pipelines/classification/logistic_regression_multiclass.py index 0178123444..65e840bc69 100644 --- a/evalml/pipelines/classification/logistic_regression_multiclass.py +++ b/evalml/pipelines/classification/logistic_regression_multiclass.py @@ -4,4 +4,3 @@ class LogisticRegressionMulticlassPipeline(MulticlassClassificationPipeline): """Logistic Regression Pipeline for multiclass classification""" component_graph = ['One Hot Encoder', 'Simple Imputer', 'Standard Scaler', 'Logistic Regression Classifier'] - supported_problem_types = ['multiclass'] diff --git a/evalml/pipelines/classification/random_forest_binary.py b/evalml/pipelines/classification/random_forest_binary.py index 025f98f340..1ea5d86fa5 100644 --- a/evalml/pipelines/classification/random_forest_binary.py +++ b/evalml/pipelines/classification/random_forest_binary.py @@ -5,4 +5,3 @@ class RFBinaryClassificationPipeline(BinaryClassificationPipeline): """Random Forest Pipeline for binary classification""" _name = "Random Forest Binary Classification Pipeline" component_graph = ['One Hot Encoder', 'Simple Imputer', 'RF Classifier Select From Model', 'Random Forest Classifier'] - supported_problem_types = ['binary'] diff --git a/evalml/pipelines/classification/random_forest_multiclass.py b/evalml/pipelines/classification/random_forest_multiclass.py index 8910170b46..6874e38c18 100644 --- a/evalml/pipelines/classification/random_forest_multiclass.py +++ b/evalml/pipelines/classification/random_forest_multiclass.py @@ -5,4 +5,3 @@ class RFMulticlassClassificationPipeline(MulticlassClassificationPipeline): """Random Forest Pipeline for multiclass classification""" _name = "Random Forest Multi-class Classification Pipeline" component_graph = ['One Hot Encoder', 'Simple Imputer', 'RF Classifier Select From Model', 'Random Forest Classifier'] - supported_problem_types = ['multiclass'] diff --git a/evalml/pipelines/classification/xgboost_binary.py b/evalml/pipelines/classification/xgboost_binary.py index 00e9e17fa2..cd6c18d007 100644 --- a/evalml/pipelines/classification/xgboost_binary.py +++ b/evalml/pipelines/classification/xgboost_binary.py @@ -5,7 +5,6 @@ class XGBoostBinaryPipeline(BinaryClassificationPipeline): """XGBoost Pipeline for both binary and multiclass classification""" _name = "XGBoost Binary Classification Pipeline" component_graph = ['One Hot Encoder', 'Simple Imputer', 'RF Classifier Select From Model', 'XGBoost Classifier'] - supported_problem_types = ['binary'] def __init__(self, parameters, random_state=0): super().__init__(parameters=parameters, diff --git a/evalml/pipelines/classification/xgboost_multiclass.py b/evalml/pipelines/classification/xgboost_multiclass.py index 293247036b..541a566a4c 100644 --- a/evalml/pipelines/classification/xgboost_multiclass.py +++ b/evalml/pipelines/classification/xgboost_multiclass.py @@ -5,7 +5,6 @@ class XGBoostMulticlassPipeline(MulticlassClassificationPipeline): """XGBoost Pipeline for multiclass classification""" _name = "XGBoost Classifier w/ One Hot Encoder + Simple Imputer + RF Classifier Select From Model" component_graph = ['One Hot Encoder', 'Simple Imputer', 'RF Classifier Select From Model', 'XGBoost Classifier'] - supported_problem_types = ['multiclass'] def __init__(self, parameters, random_state=0): super().__init__(parameters=parameters, diff --git a/evalml/pipelines/classification_pipeline.py b/evalml/pipelines/classification_pipeline.py index efa7c5b48c..573bcfa263 100644 --- a/evalml/pipelines/classification_pipeline.py +++ b/evalml/pipelines/classification_pipeline.py @@ -1,16 +1,16 @@ + import pandas as pd from evalml.pipelines import PipelineBase class ClassificationPipeline(PipelineBase): - threshold_selection_split = True def predict_proba(self, X): """Make probability estimates for labels. - Args: + Arguments: X (pd.DataFrame or np.array) : data of shape [n_samples, n_features] Returns: diff --git a/evalml/pipelines/multiclass_classification_pipeline.py b/evalml/pipelines/multiclass_classification_pipeline.py index 285fb8039c..e3abad5895 100644 --- a/evalml/pipelines/multiclass_classification_pipeline.py +++ b/evalml/pipelines/multiclass_classification_pipeline.py @@ -1,6 +1,43 @@ +from collections import OrderedDict + +import pandas as pd + +from evalml.objectives import get_objective from evalml.pipelines.classification_pipeline import ClassificationPipeline class MulticlassClassificationPipeline(ClassificationPipeline): + supported_problem_types = ['multiclass'] + + def score(self, X, y, objectives): + """Evaluate model performance on current and additional objectives + + Arguments: + X (pd.DataFrame or np.array) : data of shape [n_samples, n_features] + y (pd.Series) : true labels of length [n_samples] + objectives (list): list of objectives to score + + Returns: + dict: ordered dictionary of objective scores + """ + if not isinstance(X, pd.DataFrame): + X = pd.DataFrame(X) + + if not isinstance(y, pd.Series): + y = pd.Series(y) + + objectives = [get_objective(o) for o in objectives] + y_predicted = None + y_predicted_proba = None - threshold_selection_split = False # primary difference between binary and multiclass + scores = OrderedDict() + for objective in objectives: + if objective.score_needs_proba: + if y_predicted_proba is None: + y_predicted_proba = self.predict_proba(X) + scores.update({objective.name: objective.score(y_predicted_proba, y, X=X)}) + else: + if y_predicted is None: + y_predicted = self.predict(X) + scores.update({objective.name: objective.score(y_predicted, y, X=X)}) + return scores diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py index ba8d25231d..e47a186610 100644 --- a/evalml/pipelines/pipeline_base.py +++ b/evalml/pipelines/pipeline_base.py @@ -59,10 +59,9 @@ def __init__(self, parameters, random_state=0): """ self.random_state = get_random_state(random_state) self.component_graph = [self._instantiate_component(c, parameters) for c in self.component_graph] - self.supported_problem_types = [handle_problem_types(problem_type) for problem_type in self.supported_problem_types] self.input_feature_names = {} self.results = {} - + self.supported_problem_types = [handle_problem_types(problem_type) for problem_type in self.supported_problem_types] self.estimator = self.component_graph[-1] if isinstance(self.component_graph[-1], Estimator) else None if self.estimator is None: raise ValueError("A pipeline must have an Estimator as the last component in component_graph.") @@ -191,7 +190,7 @@ def _fit(self, X, y): self.input_feature_names.update({self.estimator.name: list(pd.DataFrame(X_t))}) self.estimator.fit(X_t, y_t) - def fit(self, X, y, objective=None, objective_fit_size=0.2): + def fit(self, X, y): """Build a model Arguments: @@ -199,11 +198,7 @@ def fit(self, X, y, objective=None, objective_fit_size=0.2): y (pd.Series): the target training labels of length [n_samples] - objective (Object or string): the objective to optimize - - objective_fit_size (float): the proportion of the dataset to include in the test split. Returns: - self """ @@ -220,7 +215,7 @@ def predict(self, X, objective=None): Args: X (pd.DataFrame or np.array) : data of shape [n_samples, n_features] - objective (Object or string): the objective to use to predict + objective (Object or string): the objective to use to make predictions Returns: pd.Series : estimated labels @@ -250,23 +245,16 @@ def score(self, X, y, objectives): objectives = [get_objective(o) for o in objectives] y_predicted = None - y_predicted_proba = None - scores = OrderedDict() for objective in objectives: if objective.score_needs_proba: - if y_predicted_proba is None: - y_predicted_proba = self.predict_proba(X) - y_predictions = y_predicted_proba + raise ValueError("Objective `{}` does not support score_needs_proba".format(objective.name)) else: if y_predicted is None: y_predicted = self.predict(X) y_predictions = y_predicted - if objective.uses_extra_columns: - scores.update({objective.name: objective.score(y_predictions, y, X)}) - else: - scores.update({objective.name: objective.score(y_predictions, y)}) + scores.update({objective.name: objective.score(y_predictions, y, X)}) return scores @@ -287,13 +275,10 @@ def get_plot_data(self, X, y, plot_metrics): if not isinstance(y, pd.Series): y = pd.Series(y) y_predicted = None - y_predicted_proba = None scores = OrderedDict() for plot_metric in plot_metrics: if plot_metric.score_needs_proba: - if y_predicted_proba is None: - y_predicted_proba = self.predict_proba(X) - y_predictions = y_predicted_proba + raise Exception("Plot metric `{}` does not support score_needs_proba".format(plot_metric.name)) else: if y_predicted is None: y_predicted = self.predict(X) diff --git a/evalml/pipelines/regression_pipeline.py b/evalml/pipelines/regression_pipeline.py index 051b4b97e6..07fae30329 100644 --- a/evalml/pipelines/regression_pipeline.py +++ b/evalml/pipelines/regression_pipeline.py @@ -2,4 +2,4 @@ class RegressionPipeline(PipelineBase): - pass + supported_problem_types = ['regression'] diff --git a/evalml/tests/automl_tests/test_auto_classification_search.py b/evalml/tests/automl_tests/test_auto_classification_search.py index 538615892b..f5bdd158ad 100644 --- a/evalml/tests/automl_tests/test_auto_classification_search.py +++ b/evalml/tests/automl_tests/test_auto_classification_search.py @@ -99,13 +99,15 @@ def test_specify_objective(X_y): X, y = X_y automl = AutoClassificationSearch(objective=Precision(), max_pipelines=1) automl.search(X, y, raise_errors=True) + assert isinstance(automl.objective, Precision) + assert automl.best_pipeline.threshold is not None def test_binary_auto(X_y): X, y = X_y - automl = AutoClassificationSearch(objective="recall", multiclass=False, max_pipelines=5) + automl = AutoClassificationSearch(objective="log_loss_binary", multiclass=False, max_pipelines=5) automl.search(X, y, raise_errors=True) - y_pred = automl.best_pipeline.predict(X, "recall") + y_pred = automl.best_pipeline.predict(X) assert len(np.unique(y_pred)) == 2 @@ -141,13 +143,12 @@ def test_multi_auto(X_y_multi): def test_multi_objective(X_y_multi): - error_msg = 'Given objective Recall is not compatible with a multiclass problem' - with pytest.raises(ValueError, match=error_msg): - automl = AutoClassificationSearch(objective="recall", multiclass=True) - - automl = AutoClassificationSearch(objective="log_loss") + automl = AutoClassificationSearch(objective="log_loss_binary") assert automl.problem_type == ProblemTypes.BINARY + automl = AutoClassificationSearch(objective="log_loss_multi") + assert automl.problem_type == ProblemTypes.MULTICLASS + automl = AutoClassificationSearch(objective='recall_micro') assert automl.problem_type == ProblemTypes.MULTICLASS @@ -232,6 +233,31 @@ def test_additional_objectives(X_y): assert 'Fraud Cost' in list(results["cv_data"][0]["all_objective_scores"].keys()) +@patch('evalml.objectives.BinaryClassificationObjective.optimize_threshold') +@patch('evalml.pipelines.BinaryClassificationPipeline.predict_proba') +@patch('evalml.pipelines.PipelineBase.fit') +def test_optimizable_threshold(mock_fit, mock_predict_proba, mock_optimize_threshold, X_y): + mock_optimize_threshold.return_value = 0.8 + X, y = X_y + automl = AutoClassificationSearch(objective='recall', max_pipelines=1) + automl.search(X, y) + mock_fit.assert_called() + mock_predict_proba.assert_called() + mock_optimize_threshold.assert_called() + assert automl.best_pipeline.threshold == 0.8 + + +@patch('evalml.pipelines.BinaryClassificationPipeline.score') +@patch('evalml.pipelines.PipelineBase.fit') +def test_non_optimizable_threshold(mock_fit, mock_score, X_y): + X, y = X_y + automl = AutoClassificationSearch(objective='AUC', max_pipelines=1) + automl.search(X, y) + mock_fit.assert_called() + mock_score.assert_called() + assert automl.best_pipeline.threshold == 0.5 + + def test_describe_pipeline_objective_ordered(X_y, capsys): X, y = X_y automl = AutoClassificationSearch(objective='AUC', max_pipelines=2) diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py index b0077fd76f..8f04055537 100644 --- a/evalml/tests/automl_tests/test_pipeline_search_plots.py +++ b/evalml/tests/automl_tests/test_pipeline_search_plots.py @@ -47,7 +47,7 @@ def search(self): else: y_train, y_test = y[train], y[test] - pipeline.fit(X_train, y_train, "precision") + pipeline.fit(X_train, y_train) plot_data.append(pipeline.get_plot_data(X_test, y_test, [ROC()])) self.results['pipeline_results'].update({0: {"plot_data": plot_data, diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py index ddc74e7fd1..4e3eebe5b2 100644 --- a/evalml/tests/conftest.py +++ b/evalml/tests/conftest.py @@ -6,6 +6,11 @@ from sklearn import datasets from skopt.space import Integer, Real +from evalml.model_family import ModelFamily +from evalml.pipelines import BinaryClassificationPipeline, RegressionPipeline +from evalml.pipelines.components import Estimator +from evalml.problem_types import ProblemTypes + def pytest_addoption(parser): parser.addoption("--has-minimal-dependencies", action="store_true", default=False, @@ -79,3 +84,39 @@ def test_space_small(): list_of_space.append(['most_frequent', 'median', 'mean']) list_of_space.append(['a', 'b', 'c']) return list_of_space + + +@pytest.fixture +def dummy_estimator(): + class MockEstimator(Estimator): + name = "Mock Classifier" + model_family = ModelFamily.NONE + supported_problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] + hyperparameter_ranges = {} + + def __init__(self, random_state=0): + super().__init__(parameters={}, component_obj=None, random_state=random_state) + return MockEstimator + + +@pytest.fixture +def dummy_binary_pipeline(dummy_estimator): + class MockBinaryClassificationPipeline(BinaryClassificationPipeline): + estimator = dummy_estimator() + component_graph = [estimator] + return MockBinaryClassificationPipeline(parameters={}) + + +@pytest.fixture +def dummy_regression_pipeline(): + class MockRegressor(Estimator): + name = "Mock Regressor" + model_family = ModelFamily.NONE + supported_problem_types = [ProblemTypes.REGRESSION] + + def __init__(self, random_state=0): + super().__init__(parameters={}, component_obj=None, random_state=random_state) + + class MockRegressionPipeline(RegressionPipeline): + component_graph = [MockRegressor()] + return MockRegressionPipeline(parameters={}) diff --git a/evalml/tests/objective_tests/test_fraud_detection.py b/evalml/tests/objective_tests/test_fraud_detection.py index 130208de06..c2d6558503 100644 --- a/evalml/tests/objective_tests/test_fraud_detection.py +++ b/evalml/tests/objective_tests/test_fraud_detection.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from evalml import AutoClassificationSearch from evalml.objectives import FraudCost @@ -21,21 +22,46 @@ def test_fraud_objective(X_y): pipeline.predict_proba(X) pipeline.score(X, y, [objective]) + +def test_fraud_objective_function_amount_col(X_y): + X, y = X_y + + objective = FraudCost(retry_percentage=.5, + interchange_fee=.02, + fraud_payout_percentage=.75, + amount_col="this column does not exist") + y_predicted = pd.Series([.1, .5, .5]) + y_true = [True, False, True] + with pytest.raises(ValueError, match="`this column does not exist` is not a valid column in X."): + objective.objective_function(y_predicted, y_true, X) + + +def test_fraud_objective_score(X_y): + X, y = X_y fraud_cost = FraudCost(amount_col="value") y_predicted = pd.Series([.1, .5, .5]) y_true = [True, False, True] - extra_columns = pd.DataFrame({"value": [100, 5, 25]}) + extra_columns = pd.DataFrame({"value": [100, 5, 250]}) - out = fraud_cost.decision_function(y_predicted, extra_columns, 5) + out = fraud_cost.decision_function(y_predicted, 5, extra_columns) assert out.tolist() == y_true score = fraud_cost.score(out, y_true, extra_columns) assert (score == 0.0) # testing with other types of inputs y_predicted = np.array([.1, .5, .5]) - extra_columns = {"value": [100, 5, 25]} - out = fraud_cost.decision_function(y_predicted, extra_columns, 5) + extra_columns = {"value": [100, 5, 250]} + out = fraud_cost.decision_function(y_predicted, 5, extra_columns) assert out.tolist() == y_true score = fraud_cost.score(out, y_true, extra_columns) assert (score == 0.0) + + y_predicted = pd.Series([.2, .01, .01]) + extra_columns = pd.DataFrame({"value": [100, 50, 50]}) + y_true = [False, False, True] + expected_y_pred = [True, False, False] + out = fraud_cost.decision_function(y_predicted, 10, extra_columns) + assert out.tolist() == expected_y_pred + score = fraud_cost.score(out, y_true, extra_columns) + assert (score == 0.255) diff --git a/evalml/tests/objective_tests/test_lead_scoring.py b/evalml/tests/objective_tests/test_lead_scoring.py index 94f51ad3c0..3a90174311 100644 --- a/evalml/tests/objective_tests/test_lead_scoring.py +++ b/evalml/tests/objective_tests/test_lead_scoring.py @@ -14,7 +14,7 @@ def test_lead_scoring_objective(X_y): automl = AutoClassificationSearch(objective=objective, max_pipelines=1, random_state=0) automl.search(X, y, raise_errors=True) pipeline = automl.best_pipeline - pipeline.predict(X, objective=objective) + pipeline.predict(X) pipeline.predict_proba(X) pipeline.score(X, y, [objective]) diff --git a/evalml/tests/objective_tests/test_objectives.py b/evalml/tests/objective_tests/test_objectives.py index 637096c31e..6d6f72adeb 100644 --- a/evalml/tests/objective_tests/test_objectives.py +++ b/evalml/tests/objective_tests/test_objectives.py @@ -1,17 +1,27 @@ import pytest from evalml.exceptions import ObjectiveNotFoundError -from evalml.objectives import ( - Precision, - PrecisionMacro, - PrecisionMicro, - get_objective, - get_objectives -) -from evalml.pipelines import LogisticRegressionBinaryPipeline +from evalml.objectives import Precision, get_objective, get_objectives +from evalml.objectives.objective_base import ObjectiveBase from evalml.problem_types import ProblemTypes +def test_create_custom_objective(): + class MockEmptyObjective(ObjectiveBase): + def objective_function(self, y_predicted, y_true, X=None): + pass + + with pytest.raises(TypeError): + MockEmptyObjective() + + class MockNoObjectiveFunctionObjective(ObjectiveBase): + name = "Mock objective without objective function" + problem_type = ProblemTypes.BINARY + + with pytest.raises(TypeError): + MockNoObjectiveFunctionObjective() + + def test_get_objective(): assert isinstance(get_objective('precision'), Precision) assert isinstance(get_objective(Precision()), Precision) @@ -26,25 +36,3 @@ def test_get_objectives_types(): assert len(get_objectives(ProblemTypes.MULTICLASS)) == 14 assert len(get_objectives(ProblemTypes.BINARY)) == 6 assert len(get_objectives(ProblemTypes.REGRESSION)) == 6 - - -def test_binary_average(X_y): - X, y = X_y - - objective = Precision() - parameters = { - 'Simple Imputer': { - 'impute_strategy': 'mean' - }, - 'Logistic Regression Classifier': { - 'penalty': 'l2', - 'C': 1.0, - } - } - - pipeline = LogisticRegressionBinaryPipeline(parameters=parameters, random_state=0) - pipeline.fit(X, y, objective) - y_pred = pipeline.predict(X) - - assert Precision().score(y, y_pred) == PrecisionMicro().score(y, y_pred) - assert Precision().score(y, y_pred) == PrecisionMacro().score(y, y_pred) diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py new file mode 100644 index 0000000000..06f0337fe3 --- /dev/null +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py @@ -0,0 +1,61 @@ +from unittest.mock import patch + +import numpy as np +import pytest + + +@patch('evalml.objectives.BinaryClassificationObjective.decision_function') +@patch('evalml.pipelines.components.Estimator.predict_proba') +@patch('evalml.pipelines.components.Estimator.predict') +@patch('evalml.pipelines.PipelineBase._transform') +@patch('evalml.pipelines.PipelineBase.fit') +def test_binary_classification_pipeline_predict(mock_fit, mock_transform, + mock_predict, mock_predict_proba, + mock_obj_decision, X_y, dummy_binary_pipeline): + X, y = X_y + binary_pipeline = dummy_binary_pipeline + # test no objective passed and no custom threshold uses underlying estimator's predict method + binary_pipeline.predict(X) + mock_predict.assert_called() + mock_predict.reset_mock() + + # test objective passed but no custom threshold uses underlying estimator's predict method + binary_pipeline.predict(X, 'recall') + mock_predict.assert_called() + mock_predict.reset_mock() + + # test custom threshold set but no objective passed + mock_predict_proba.return_value = np.array([[0.1, 0.2], [0.1, 0.2]]) + binary_pipeline.threshold = 0.6 + binary_pipeline.predict(X) + mock_predict.assert_not_called() + mock_predict_proba.assert_called() + mock_obj_decision.assert_not_called() + + # test custom threshold set but no objective passed + mock_predict.reset_mock() + mock_predict_proba.return_value = np.array([[0.1, 0.2], [0.1, 0.2]]) + binary_pipeline.threshold = 0.6 + binary_pipeline.predict(X) + mock_predict.assert_not_called() + mock_predict_proba.assert_called() + mock_obj_decision.assert_not_called() + + # test custom threshold set and objective passed + mock_predict.reset_mock() + mock_predict_proba.reset_mock() + mock_predict_proba.return_value = np.array([[0.1, 0.2], [0.1, 0.2]]) + binary_pipeline.threshold = 0.6 + binary_pipeline.predict(X, 'recall') + mock_predict.assert_not_called() + mock_predict_proba.assert_called() + mock_obj_decision.assert_called() + + +@patch('evalml.pipelines.PipelineBase._transform') +def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y, dummy_binary_pipeline): + X, y = X_y + binary_pipeline = dummy_binary_pipeline + with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): + binary_pipeline.predict(X, "recall_micro") + mock_transform.assert_called() diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py index 727f121bed..e4ad2515ba 100644 --- a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py @@ -1,10 +1,11 @@ import numpy as np import pandas as pd +import pytest from pytest import importorskip from sklearn.impute import SimpleImputer from sklearn.pipeline import Pipeline -from evalml.objectives import PrecisionMicro +from evalml.objectives import Precision, PrecisionMicro from evalml.pipelines import ( CatBoostBinaryClassificationPipeline, CatBoostMulticlassClassificationPipeline @@ -32,6 +33,40 @@ def test_catboost_init(): assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0]) +def test_catboost_objective_tuning(X_y): + X, y = X_y + + parameters = { + 'Simple Imputer': { + 'impute_strategy': 'most_frequent' + }, + 'CatBoost Classifier': { + "n_estimators": 500, + "bootstrap_type": 'Bernoulli', + "eta": 0.1, + "max_depth": 3, + } + } + clf = CatBoostBinaryClassificationPipeline(parameters=parameters) + clf.fit(X, y) + y_pred = clf.predict(X) + + objective = PrecisionMicro() + with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): + y_pred_with_objective = clf.predict(X, objective) + + # testing objective parameter passed in does not change results + objective = Precision() + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + # testing objective parameter passed and set threshold does change results + with pytest.raises(AssertionError): + clf.threshold = 0.01 + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + def test_catboost_multi(X_y_multi): from catboost import CatBoostClassifier as CBClassifier X, y = X_y_multi @@ -57,9 +92,8 @@ def test_catboost_multi(X_y_multi): "max_depth": 3, } } - clf = CatBoostMulticlassClassificationPipeline(parameters=parameters, random_state=get_random_state(random_seed)) - clf.fit(X, y, objective) + clf.fit(X, y) clf_score = clf.score(X, y, [objective]) y_pred = clf.predict(X) @@ -75,7 +109,6 @@ def test_catboost_input_feature_names(X_y): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - objective = PrecisionMicro() parameters = { 'Simple Imputer': { 'impute_strategy': 'mean' @@ -88,7 +121,7 @@ def test_catboost_input_feature_names(X_y): } } clf = CatBoostBinaryClassificationPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() for col_name in clf.feature_importances["feature"]: @@ -97,7 +130,6 @@ def test_catboost_input_feature_names(X_y): def test_catboost_categorical(X_y_categorical_classification): X, y = X_y_categorical_classification - objective = PrecisionMicro() parameters = { 'Simple Imputer': { 'impute_strategy': 'most_frequent' @@ -110,6 +142,6 @@ def test_catboost_categorical(X_y_categorical_classification): } } clf = CatBoostBinaryClassificationPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py index dd2c9d549d..85b94dc7e2 100644 --- a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py @@ -1,12 +1,13 @@ import category_encoders as ce import numpy as np import pandas as pd +import pytest from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline as SKPipeline from sklearn.preprocessing import StandardScaler as SkScaler -from evalml.objectives import PrecisionMicro +from evalml.objectives import Precision, PrecisionMicro from evalml.pipelines import ( LogisticRegressionBinaryPipeline, LogisticRegressionMulticlassPipeline @@ -32,6 +33,38 @@ def test_lor_init(X_y): assert (clf.random_state.get_state()[0] == np.random.RandomState(1).get_state()[0]) +def test_lor_objective_tuning(X_y): + X, y = X_y + + parameters = { + 'Simple Imputer': { + 'impute_strategy': 'mean' + }, + 'Logistic Regression Classifier': { + 'penalty': 'l2', + 'C': 0.5, + } + } + clf = LogisticRegressionBinaryPipeline(parameters=parameters) + clf.fit(X, y) + y_pred = clf.predict(X) + + objective = PrecisionMicro() + with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): + y_pred_with_objective = clf.predict(X, objective) + + # testing objective parameter passed in does not change results + objective = Precision() + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + # testing objective parameter passed and set threshold does change results + with pytest.raises(AssertionError): + clf.threshold = 0.01 + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + def test_lor_multi(X_y_multi): X, y = X_y_multi imputer = SimpleImputer(strategy='mean') @@ -71,7 +104,7 @@ def test_lor_multi(X_y_multi): assert not clf.feature_importances.isnull().all().all() # testing objective parameter passed in does not change results - clf.fit(X, y, objective) + clf.fit(X, y) y_pred_with_objective = clf.predict(X) assert((y_pred == y_pred_with_objective).all()) @@ -81,8 +114,6 @@ def test_lor_input_feature_names(X_y): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - - objective = PrecisionMicro() parameters = { 'Simple Imputer': { 'impute_strategy': 'mean' @@ -92,9 +123,8 @@ def test_lor_input_feature_names(X_y): 'C': 1.0, } } - clf = LogisticRegressionBinaryPipeline(parameters=parameters, random_state=1) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py index 53a13ef96f..28c523314a 100644 --- a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py @@ -1,12 +1,13 @@ import category_encoders as ce import numpy as np import pandas as pd +import pytest from sklearn.ensemble import RandomForestClassifier from sklearn.feature_selection import SelectFromModel from sklearn.impute import SimpleImputer from sklearn.pipeline import Pipeline -from evalml.objectives import PrecisionMicro +from evalml.objectives import Precision, PrecisionMicro from evalml.pipelines import ( RFBinaryClassificationPipeline, RFMulticlassClassificationPipeline @@ -33,9 +34,7 @@ def test_rf_init(X_y): "max_depth": 5, } } - clf = RFBinaryClassificationPipeline(parameters=parameters, random_state=2) - expected_parameters = { 'Simple Imputer': { 'impute_strategy': 'mean', @@ -56,6 +55,44 @@ def test_rf_init(X_y): assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0]) +def test_rf_objective_tuning(X_y): + X, y = X_y + + parameters = { + 'Simple Imputer': { + 'impute_strategy': 'mean' + }, + 'RF Classifier Select From Model': { + "percent_features": 1.0, + "number_features": len(X[0]), + "n_estimators": 20, + "max_depth": 5 + }, + 'Random Forest Classifier': { + "n_estimators": 20, + "max_depth": 5, + } + } + clf = RFBinaryClassificationPipeline(parameters=parameters) + clf.fit(X, y) + y_pred = clf.predict(X) + + objective = PrecisionMicro() + with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): + y_pred_with_objective = clf.predict(X, objective) + + # testing objective parameter passed in does not change results + objective = Precision() + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + # testing objective parameter passed and set threshold does change results + with pytest.raises(AssertionError): + clf.threshold = 0.01 + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + def test_rf_multi(X_y_multi): X, y = X_y_multi @@ -104,9 +141,9 @@ def test_rf_multi(X_y_multi): # testing objective parameter passed in does not change results clf = RFMulticlassClassificationPipeline(parameters=parameters) - clf.fit(X, y, objective) - y_pred_with_objective = clf.predict(X) - assert((y_pred == y_pred_with_objective).all()) + clf.fit(X, y) + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) def test_rf_input_feature_names(X_y): @@ -114,7 +151,6 @@ def test_rf_input_feature_names(X_y): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - objective = PrecisionMicro() parameters = { 'Simple Imputer': { 'impute_strategy': 'mean' @@ -129,8 +165,9 @@ def test_rf_input_feature_names(X_y): "max_depth": 5, } } + clf = RFBinaryClassificationPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() for col_name in clf.feature_importances["feature"]: diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost.py index e7a5ebfcae..8ebffe1e80 100644 --- a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost.py +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost.py @@ -1,13 +1,14 @@ import category_encoders as ce import numpy as np import pandas as pd +import pytest from pytest import importorskip from sklearn.ensemble import RandomForestClassifier as SKRandomForestClassifier from sklearn.feature_selection import SelectFromModel from sklearn.impute import SimpleImputer from sklearn.pipeline import Pipeline -from evalml.objectives import PrecisionMicro +from evalml.objectives import Precision, PrecisionMicro from evalml.pipelines import XGBoostBinaryPipeline, XGBoostMulticlassPipeline from evalml.utils import ( SEED_BOUNDS, @@ -70,6 +71,47 @@ def test_xg_init(X_y): assert (clf.random_state.get_state()[0] == np.random.RandomState(1).get_state()[0]) +def test_lor_objective_tuning(X_y): + X, y = X_y + + parameters = { + 'Simple Imputer': { + 'impute_strategy': 'median' + }, + 'RF Classifier Select From Model': { + "percent_features": 1.0, + "number_features": len(X[0]), + "n_estimators": 20, + "max_depth": 5 + }, + 'XGBoost Classifier': { + "n_estimators": 20, + "eta": 0.2, + "min_child_weight": 3, + "max_depth": 5, + } + } + + clf = XGBoostBinaryPipeline(parameters=parameters) + clf.fit(X, y) + y_pred = clf.predict(X) + + objective = PrecisionMicro() + with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): + y_pred_with_objective = clf.predict(X, objective) + + # testing objective parameter passed in does not change results + objective = Precision() + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + # testing objective parameter passed and set threshold does change results + with pytest.raises(AssertionError): + clf.threshold = 0.01 + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) + + def test_xg_multi(X_y_multi): X, y = X_y_multi @@ -125,7 +167,7 @@ def test_xg_multi(X_y_multi): assert not clf.feature_importances.isnull().all().all() # testing objective parameter passed in does not change results - clf.fit(X, y, objective) + clf.fit(X, y) y_pred_with_objective = clf.predict(X) assert((y_pred == y_pred_with_objective).all()) @@ -135,7 +177,6 @@ def test_xg_input_feature_names(X_y): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - objective = PrecisionMicro() parameters = { 'Simple Imputer': { 'impute_strategy': 'median' @@ -155,7 +196,7 @@ def test_xg_input_feature_names(X_y): } clf = XGBoostBinaryPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() for col_name in clf.feature_importances["feature"]: diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py index 7bb429f975..629ab7b98a 100644 --- a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py +++ b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py @@ -54,17 +54,21 @@ def test_catboost_regression(X_y_reg): } } clf = CatBoostRegressionPipeline(parameters=parameters, random_state=get_random_state(random_seed)) - clf.fit(X, y, objective) - clf_score = clf.score(X, y, [objective]) + clf.fit(X, y) + clf_scores = clf.score(X, y, [objective]) y_pred = clf.predict(X) np.testing.assert_almost_equal(y_pred, sk_pipeline.predict(X), decimal=5) - np.testing.assert_almost_equal(sk_score, clf_score[objective.name], decimal=5) + np.testing.assert_almost_equal(sk_score, clf_scores[objective.name], decimal=5) + + # testing objective parameter passed in does not change results + clf.fit(X, y) + y_pred_with_objective = clf.predict(X) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) def test_cbr_input_feature_names(X_y_categorical_regression): X, y = X_y_categorical_regression - objective = R2() parameters = { 'Simple Imputer': { 'impute_strategy': 'most_frequent' @@ -77,6 +81,6 @@ def test_cbr_input_feature_names(X_y_categorical_regression): } } clf = CatBoostRegressionPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py index cdc23acccf..e3c79b590d 100644 --- a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py +++ b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py @@ -62,8 +62,8 @@ def test_linear_regression(X_y_categorical_regression): assert not clf.feature_importances.isnull().all().all() # testing objective parameter passed in does not change results - clf.fit(X, y, objective) - y_pred_with_objective = clf.predict(X, objective) + clf.fit(X, y) + y_pred_with_objective = clf.predict(X) assert((y_pred == y_pred_with_objective).all()) @@ -72,7 +72,6 @@ def test_lr_input_feature_names(X_y): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - objective = R2() parameters = { 'Simple Imputer': { 'impute_strategy': 'mean' @@ -83,7 +82,7 @@ def test_lr_input_feature_names(X_y): } } clf = LinearRegressionPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() for col_name in clf.feature_importances["feature"]: diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py index c989e66cc3..5cdc9190ec 100644 --- a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py +++ b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py @@ -31,7 +31,6 @@ def test_rf_init(X_y_reg): } } clf = RFRegressionPipeline(parameters=parameters, random_state=2) - expected_parameters = { 'Simple Imputer': { 'impute_strategy': 'mean', @@ -89,11 +88,14 @@ def test_rf_regression(X_y_categorical_regression): } clf = RFRegressionPipeline(parameters=parameters) clf.fit(X, y) - clf_score = clf.score(X, y, [objective]) + clf_scores = clf.score(X, y, [objective]) y_pred = clf.predict(X) - np.testing.assert_almost_equal(y_pred, sk_pipeline.predict(X), decimal=5) - np.testing.assert_almost_equal(sk_score, clf_score[objective.name], decimal=5) + np.testing.assert_almost_equal(sk_score, clf_scores[objective.name], decimal=5) + + # testing objective parameter passed in does not change results + y_pred_with_objective = clf.predict(X, objective) + np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5) def test_rfr_input_feature_names(X_y_reg): @@ -101,7 +103,6 @@ def test_rfr_input_feature_names(X_y_reg): # create a list of column names col_names = ["col_{}".format(i) for i in range(len(X[0]))] X = pd.DataFrame(X, columns=col_names) - objective = R2() parameters = { 'Simple Imputer': { 'impute_strategy': 'mean' @@ -117,7 +118,7 @@ def test_rfr_input_feature_names(X_y_reg): } } clf = RFRegressionPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) assert len(clf.feature_importances) == len(X.columns) assert not clf.feature_importances.isnull().all().all() for col_name in clf.feature_importances["feature"]: diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py index 5b2105a73d..9d10057857 100644 --- a/evalml/tests/pipeline_tests/test_pipelines.py +++ b/evalml/tests/pipeline_tests/test_pipelines.py @@ -8,10 +8,9 @@ from evalml.exceptions import IllFormattedClassNameError from evalml.model_family import ModelFamily -from evalml.objectives import FraudCost, Precision +from evalml.objectives import FraudCost, Precision, Recall from evalml.pipelines import LogisticRegressionBinaryPipeline, PipelineBase from evalml.pipelines.components import ( - Estimator, LogisticRegressionClassifier, OneHotEncoder, RFClassifierSelectFromModel, @@ -129,9 +128,8 @@ def test_serialization(X_y, tmpdir, lr_pipeline): def pickled_pipeline_path(X_y, tmpdir, lr_pipeline): X, y = X_y path = os.path.join(str(tmpdir), 'pickled_pipe.pkl') - MockPrecision = type('MockPrecision', (Precision,), {}) pipeline = LogisticRegressionBinaryPipeline(parameters=lr_pipeline.parameters) - pipeline.fit(X, y, MockPrecision()) + pipeline.fit(X, y) pipeline.save(path) return path @@ -143,7 +141,7 @@ def test_load_pickled_pipeline_with_custom_objective(X_y, pickled_pipeline_path, MockPrecision() # noqa: F821: ignore flake8's "undefined name" error objective = Precision() pipeline = LogisticRegressionBinaryPipeline(parameters=lr_pipeline.parameters) - pipeline.fit(X, y, objective) + pipeline.fit(X, y) assert PipelineBase.load(pickled_pipeline_path).score(X, y, [objective]) == pipeline.score(X, y, [objective]) @@ -167,10 +165,10 @@ def test_reproducibility(X_y): } clf = LogisticRegressionBinaryPipeline(parameters=parameters) - clf.fit(X, y, objective) + clf.fit(X, y) clf_1 = LogisticRegressionBinaryPipeline(parameters=parameters) - clf_1.fit(X, y, objective) + clf_1.fit(X, y) assert clf_1.score(X, y, [objective]) == clf.score(X, y, [objective]) @@ -197,7 +195,6 @@ def test_describe(X_y, capsys, lr_pipeline): lrp = lr_pipeline lrp.describe() out, err = capsys.readouterr() - lrp.describe() assert "Logistic Regression Binary Pipeline" in out assert "Problem Types: Binary Classification" in out assert "Model Family: Linear Model" in out @@ -308,7 +305,7 @@ class TestPipeline(PipelineBase): assert clf.model_family == ModelFamily.LINEAR_MODEL assert clf.supported_problem_types == [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] - clf.fit(X, y, 'precision') + clf.fit(X, y) clf.score(X, y, ['precision']) assert not clf.feature_importances.isnull().all().all() @@ -333,7 +330,7 @@ class TestPipeline(PipelineBase): assert clf.model_family == ModelFamily.LINEAR_MODEL assert clf.supported_problem_types == [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] - clf.fit(X, y, 'precision') + clf.fit(X, y) clf.score(X, y, ['precision']) assert not clf.feature_importances.isnull().all().all() @@ -347,6 +344,29 @@ class TestPipeline(PipelineBase): TestPipeline(parameters={}) +def test_score_with_list_of_multiple_objectives(X_y): + X, y = X_y + parameters = { + 'Simple Imputer': { + 'impute_strategy': 'mean' + }, + 'Logistic Regression Classifier': { + 'penalty': 'l2', + 'C': 1.0, + } + } + + clf = LogisticRegressionBinaryPipeline(parameters=parameters) + clf.fit(X, y) + recall_name = Recall.name + precision_name = Precision.name + objective_names = [recall_name, precision_name] + scores = clf.score(X, y, objective_names) + assert len(scores.values()) == 2 + assert all(name in scores.keys() for name in objective_names) + assert not any(np.isnan(val) for val in scores.values()) + + def test_no_default_parameters(): class MockComponent(Transformer): name = "Mock Component" @@ -448,19 +468,19 @@ class MockPipelineOverRide(PipelineBase): assert MockPipelineOverRide(parameters={}).hyperparameters == hyperparameters -def test_hyperparameters_none(): - class MockEstimator(Estimator): - hyperparameter_ranges = {} - model_family = ModelFamily.NONE - name = "Mock Estimator" - supported_problem_types = [ProblemTypes.BINARY] - - def __init__(self, random_state=0): - super().__init__(parameters={}, component_obj={}, random_state=random_state) - +def test_hyperparameters_none(dummy_estimator): class MockPipelineNone(PipelineBase): - component_graph = [MockEstimator()] + component_graph = [dummy_estimator()] supported_problem_types = ['binary'] assert MockPipelineNone.hyperparameters == {} assert MockPipelineNone(parameters={}).hyperparameters == {} + + +@patch('evalml.pipelines.components.Estimator.predict') +def test_score_with_objective_that_requires_predict_proba(mock_predict, dummy_regression_pipeline, X_y): + X, y = X_y + mock_predict.return_value = np.array([1] * 100) + with pytest.raises(ValueError, match="Objective `AUC` does not support score_needs_proba"): + dummy_regression_pipeline.score(X, y, ['recall', 'auc']) + mock_predict.assert_called()