Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically infer multiclass based off of objective #99

Merged
merged 24 commits into from Oct 11, 2019
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
618d2de
Added infer base off objective
jeremyliweishih Sep 23, 2019
7c96a4f
lint
jeremyliweishih Sep 23, 2019
857b3bd
Update logic
jeremyliweishih Sep 23, 2019
8a080b7
Added more test cases and added comments
jeremyliweishih Sep 24, 2019
5ba7f03
comments
jeremyliweishih Sep 24, 2019
ab6730a
First pass on exception for multiclass
jeremyliweishih Sep 24, 2019
7a1c5ab
lint
jeremyliweishih Sep 24, 2019
5dd3a1d
clean up check_multiclass
jeremyliweishih Sep 25, 2019
28f19b3
Update comments
jeremyliweishih Sep 25, 2019
27e9935
Move check
jeremyliweishih Sep 25, 2019
10daf05
Merge branch 'master' into auto-infer
jeremyliweishih Sep 26, 2019
dee5121
lint
jeremyliweishih Sep 26, 2019
c259e48
Merge remote-tracking branch 'origin' into auto-infer
jeremyliweishih Sep 26, 2019
0cfb94b
Merge branch 'master' of https://github.com/FeatureLabs/evalml into a…
jeremyliweishih Sep 30, 2019
99172ce
Update logic
jeremyliweishih Sep 30, 2019
d7dff8c
Changed to lower-case
jeremyliweishih Sep 30, 2019
dc73b6c
Added problem type to custom objectives
jeremyliweishih Sep 30, 2019
a75362a
lint
jeremyliweishih Sep 30, 2019
0b961d8
Cleanup
jeremyliweishih Oct 10, 2019
2466e6f
Merge branch 'master' of https://github.com/FeatureLabs/evalml into a…
jeremyliweishih Oct 10, 2019
4561941
Lint after merge
jeremyliweishih Oct 10, 2019
a550432
Added test for additional_objectives error
jeremyliweishih Oct 11, 2019
38f0917
Cleaned up default behavior
jeremyliweishih Oct 11, 2019
966d5fa
Merge branch 'master' into auto-infer
jeremyliweishih Oct 11, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 20 additions & 3 deletions evalml/models/auto_base.py
Expand Up @@ -11,6 +11,7 @@
from evalml import preprocessing
from evalml.objectives import get_objective, get_objectives
from evalml.pipelines import get_pipelines
from evalml.problem_types import ProblemTypes
from evalml.tuners import SKOptTuner


Expand All @@ -22,6 +23,7 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
tuner = SKOptTuner

self.objective = get_objective(objective)
self.problem_type = problem_type
self.max_pipelines = max_pipelines
self.max_time = max_time
self.model_types = model_types
Expand All @@ -31,13 +33,16 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
self.cv = cv
self.verbose = verbose

self.possible_pipelines = get_pipelines(problem_type=problem_type, model_types=model_types)
objective = get_objective(objective)
self.possible_pipelines = get_pipelines(problem_type=self.problem_type, model_types=model_types)
self.objective = get_objective(objective)

if self.problem_type not in self.objective.problem_types:
raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value))

if additional_objectives is not None:
additional_objectives = [get_objective(o) for o in additional_objectives]
else:
additional_objectives = get_objectives(problem_type)
additional_objectives = get_objectives(self.problem_type)

# if our main objective is part of default set of objectives for problem_type, remove it
existing_main_objective = next((obj for obj in additional_objectives if obj.name == self.objective.name), None)
Expand Down Expand Up @@ -106,6 +111,9 @@ def fit(self, X, y, feature_types=None, raise_errors=False):
if not isinstance(y, pd.Series):
y = pd.Series(y)

if self.problem_type != ProblemTypes.REGRESSION:
self.check_multiclass(y)

self._log_title("Beginning pipeline search")
self._log("Optimizing for %s. " % self.objective.name, new_line=False)

Expand Down Expand Up @@ -141,6 +149,15 @@ def fit(self, X, y, feature_types=None, raise_errors=False):

self._log("\n✔ Optimization finished")

def check_multiclass(self, y):
if y.nunique() <= 2:
return
if ProblemTypes.MULTICLASS not in self.objective.problem_types:
raise ValueError("Given objective {} is not compatible with a multiclass problem.".format(self.objective.name))
for obj in self.additional_objectives:
if ProblemTypes.MULTICLASS not in obj.problem_types:
raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name))
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

def _do_iteration(self, X, y, pbar, raise_errors):
# determine which pipeline to build
pipeline_class = self._select_pipeline()
Expand Down
30 changes: 25 additions & 5 deletions evalml/models/auto_classifier.py
Expand Up @@ -3,6 +3,7 @@

from .auto_base import AutoBase

from evalml.objectives import get_objective
from evalml.problem_types import ProblemTypes


Expand Down Expand Up @@ -58,15 +59,18 @@ def __init__(self,

verbose (boolean): If True, turn verbosity on. Defaults to True
"""
if objective is None:
objective = "precision"

if cv is None:
cv = StratifiedKFold(n_splits=3, random_state=random_state)

problem_type = ProblemTypes.BINARY
if multiclass:
problem_type = ProblemTypes.MULTICLASS
# set default objective if none provided
if objective is None and not multiclass:
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
objective = "precision"
elif objective is None and multiclass:
objective = "precision_micro"

problem_type = self.set_problem_type(objective, multiclass)

super().__init__(
tuner=tuner,
objective=objective,
Expand All @@ -82,3 +86,19 @@ def __init__(self,
verbose=verbose,
additional_objectives=additional_objectives
)

def set_problem_type(self, objective, multiclass):
"""
If there is an objective either:
a. Set problem_type to MULTICLASS if objective is only multiclass and multiclass is false
b. Set problem_type to MUTLICLASS if multiclass is true
c. Default to BINARY
"""
problem_type = ProblemTypes.BINARY
if objective:
# if exclusively multiclass: infer
if [ProblemTypes.MULTICLASS] == get_objective(objective).problem_types:
problem_type = ProblemTypes.MULTICLASS
elif multiclass:
problem_type = ProblemTypes.MULTICLASS
return problem_type
3 changes: 3 additions & 0 deletions evalml/objectives/fraud_cost.py
@@ -1,9 +1,12 @@
from .objective_base import ObjectiveBase

from evalml.problem_types import ProblemTypes


class FraudCost(ObjectiveBase):
"""Score the percentage of money lost of the total transaction amount process due to fraud"""
name = "Fraud Cost"
problem_types = [ProblemTypes.BINARY]
needs_fitting = True
greater_is_better = False
uses_extra_columns = True
Expand Down
4 changes: 4 additions & 0 deletions evalml/objectives/lead_scoring.py
@@ -1,9 +1,13 @@
from .objective_base import ObjectiveBase

from evalml.problem_types import ProblemTypes


class LeadScoring(ObjectiveBase):
"""Lead scoring"""
name = "Lead Scoring"
problem_types = [ProblemTypes.BINARY]

needs_fitting = True
greater_is_better = True
fit_needs_proba = True
Expand Down
6 changes: 3 additions & 3 deletions evalml/problem_types/problem_types.py
Expand Up @@ -3,6 +3,6 @@

class ProblemTypes(Enum):
"""Enum for type of machine learning problem: BINARY, MULTICLASS, or REGRESSION"""
BINARY = 'BINARY'
MULTICLASS = 'MULTICLASS'
REGRESSION = 'REGRESSION'
BINARY = 'binary'
MULTICLASS = 'multiclass'
REGRESSION = 'regression'
26 changes: 26 additions & 0 deletions evalml/tests/automl_tests/test_autoclassifier.py
Expand Up @@ -113,6 +113,11 @@ def test_multi_auto(X_y_multi):
y_pred = clf.best_pipeline.predict(X)
assert len(np.unique(y_pred)) == 3

error_msg = 'not compatible with a multiclass problem.'
with pytest.raises(ValueError, match=error_msg):
clf = AutoClassifier(objective='recall')
clf.fit(X, y)

objective = PrecisionMicro()
clf = AutoClassifier(objective=objective, multiclass=True)
clf.fit(X, y)
Expand All @@ -125,6 +130,27 @@ def test_multi_auto(X_y_multi):
assert clf.additional_objectives == expected_additional_objectives


def test_multi_objective(X_y_multi):
error_msg = 'Given objective Recall is not compatible with a multiclass problem'
with pytest.raises(ValueError, match=error_msg):
clf = AutoClassifier(objective="recall", multiclass=True)

clf = AutoClassifier(objective="log_loss")
assert clf.problem_type == ProblemTypes.BINARY

clf = AutoClassifier(objective='recall_micro')
assert clf.problem_type == ProblemTypes.MULTICLASS

clf = AutoClassifier(objective='recall')
assert clf.problem_type == ProblemTypes.BINARY

clf = AutoClassifier(multiclass=True)
assert clf.problem_type == ProblemTypes.MULTICLASS

clf = AutoClassifier()
assert clf.problem_type == ProblemTypes.BINARY


def test_categorical_classification(X_y_categorical_classification):
X, y = X_y_categorical_classification
clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
Expand Down