alteryx · jeremyliweishih · Oct 11, 2019 · Sep 23, 2019 · Sep 23, 2019 · Sep 23, 2019
diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
@@ -11,6 +11,7 @@
 from evalml import preprocessing
 from evalml.objectives import get_objective, get_objectives
 from evalml.pipelines import get_pipelines
+from evalml.problem_types import ProblemTypes
 from evalml.tuners import SKOptTuner
 
 
@@ -22,6 +23,7 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
             tuner = SKOptTuner
 
         self.objective = get_objective(objective)
+        self.problem_type = problem_type
         self.max_pipelines = max_pipelines
         self.max_time = max_time
         self.model_types = model_types
@@ -31,13 +33,16 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
         self.cv = cv
         self.verbose = verbose
 
-        self.possible_pipelines = get_pipelines(problem_type=problem_type, model_types=model_types)
-        objective = get_objective(objective)
+        self.possible_pipelines = get_pipelines(problem_type=self.problem_type, model_types=model_types)
+        self.objective = get_objective(objective)
+
+        if self.problem_type not in self.objective.problem_types:
+            raise ValueError("Given objective {} is not compatible with a {} problem.".format(self.objective.name, self.problem_type.value))
 
         if additional_objectives is not None:
             additional_objectives = [get_objective(o) for o in additional_objectives]
         else:
-            additional_objectives = get_objectives(problem_type)
+            additional_objectives = get_objectives(self.problem_type)
 
             # if our main objective is part of default set of objectives for problem_type, remove it
             existing_main_objective = next((obj for obj in additional_objectives if obj.name == self.objective.name), None)
@@ -106,6 +111,9 @@ def fit(self, X, y, feature_types=None, raise_errors=False):
         if not isinstance(y, pd.Series):
             y = pd.Series(y)
 
+        if self.problem_type != ProblemTypes.REGRESSION:
+            self.check_multiclass(y)
+
         self._log_title("Beginning pipeline search")
         self._log("Optimizing for %s. " % self.objective.name, new_line=False)
 
@@ -141,6 +149,15 @@ def fit(self, X, y, feature_types=None, raise_errors=False):
 
         self._log("\n✔ Optimization finished")
 
+    def check_multiclass(self, y):
+        if y.nunique() <= 2:
+            return
+        if ProblemTypes.MULTICLASS not in self.objective.problem_types:
+            raise ValueError("Given objective {} is not compatible with a multiclass problem.".format(self.objective.name))
+        for obj in self.additional_objectives:
+            if ProblemTypes.MULTICLASS not in obj.problem_types:
+                raise ValueError("Additional objective {} is not compatible with a multiclass problem.".format(obj.name))
+
     def _do_iteration(self, X, y, pbar, raise_errors):
         # determine which pipeline to build
         pipeline_class = self._select_pipeline()

diff --git a/evalml/models/auto_classifier.py b/evalml/models/auto_classifier.py
@@ -3,6 +3,7 @@
 
 from .auto_base import AutoBase
 
+from evalml.objectives import get_objective
 from evalml.problem_types import ProblemTypes
 
 
@@ -58,15 +59,18 @@ def __init__(self,
 
             verbose (boolean): If True, turn verbosity on. Defaults to True
         """
-        if objective is None:
-            objective = "precision"
 
         if cv is None:
             cv = StratifiedKFold(n_splits=3, random_state=random_state)
 
-        problem_type = ProblemTypes.BINARY
-        if multiclass:
-            problem_type = ProblemTypes.MULTICLASS
+        # set default objective if none provided
+        if objective is None and not multiclass:
+            objective = "precision"
+        elif objective is None and multiclass:
+            objective = "precision_micro"
+
+        problem_type = self.set_problem_type(objective, multiclass)
+
         super().__init__(
             tuner=tuner,
             objective=objective,
@@ -82,3 +86,19 @@ def __init__(self,
             verbose=verbose,
             additional_objectives=additional_objectives
         )
+
+    def set_problem_type(self, objective, multiclass):
+        """
+        If there is an objective either:
+            a. Set problem_type to MULTICLASS if objective is only multiclass and multiclass is false
+            b. Set problem_type to MUTLICLASS if multiclass is true
+            c. Default to BINARY
+        """
+        problem_type = ProblemTypes.BINARY
+        if objective:
+            # if exclusively multiclass: infer
+            if [ProblemTypes.MULTICLASS] == get_objective(objective).problem_types:
+                problem_type = ProblemTypes.MULTICLASS
+            elif multiclass:
+                problem_type = ProblemTypes.MULTICLASS
+        return problem_type
diff --git a/evalml/objectives/fraud_cost.py b/evalml/objectives/fraud_cost.py
@@ -1,9 +1,12 @@
 from .objective_base import ObjectiveBase
 
+from evalml.problem_types import ProblemTypes
+
 
 class FraudCost(ObjectiveBase):
     """Score the percentage of money lost of the total transaction amount process due to fraud"""
     name = "Fraud Cost"
+    problem_types = [ProblemTypes.BINARY]
     needs_fitting = True
     greater_is_better = False
     uses_extra_columns = True

diff --git a/evalml/objectives/lead_scoring.py b/evalml/objectives/lead_scoring.py
@@ -1,9 +1,13 @@
 from .objective_base import ObjectiveBase
 
+from evalml.problem_types import ProblemTypes
+
 
 class LeadScoring(ObjectiveBase):
     """Lead scoring"""
     name = "Lead Scoring"
+    problem_types = [ProblemTypes.BINARY]
+
     needs_fitting = True
     greater_is_better = True
     fit_needs_proba = True

diff --git a/evalml/problem_types/problem_types.py b/evalml/problem_types/problem_types.py
@@ -3,6 +3,6 @@
 
 class ProblemTypes(Enum):
     """Enum for type of machine learning problem: BINARY, MULTICLASS, or REGRESSION"""
-    BINARY = 'BINARY'
-    MULTICLASS = 'MULTICLASS'
-    REGRESSION = 'REGRESSION'
+    BINARY = 'binary'
+    MULTICLASS = 'multiclass'
+    REGRESSION = 'regression'
diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py
@@ -113,6 +113,11 @@ def test_multi_auto(X_y_multi):
     y_pred = clf.best_pipeline.predict(X)
     assert len(np.unique(y_pred)) == 3
 
+    error_msg = 'not compatible with a multiclass problem.'
+    with pytest.raises(ValueError, match=error_msg):
+        clf = AutoClassifier(objective='recall')
+        clf.fit(X, y)
+
     objective = PrecisionMicro()
     clf = AutoClassifier(objective=objective, multiclass=True)
     clf.fit(X, y)
@@ -125,6 +130,27 @@ def test_multi_auto(X_y_multi):
     assert clf.additional_objectives == expected_additional_objectives
 
 
+def test_multi_objective(X_y_multi):
+    error_msg = 'Given objective Recall is not compatible with a multiclass problem'
+    with pytest.raises(ValueError, match=error_msg):
+        clf = AutoClassifier(objective="recall", multiclass=True)
+
+    clf = AutoClassifier(objective="log_loss")
+    assert clf.problem_type == ProblemTypes.BINARY
+
+    clf = AutoClassifier(objective='recall_micro')
+    assert clf.problem_type == ProblemTypes.MULTICLASS
+
+    clf = AutoClassifier(objective='recall')
+    assert clf.problem_type == ProblemTypes.BINARY
+
+    clf = AutoClassifier(multiclass=True)
+    assert clf.problem_type == ProblemTypes.MULTICLASS
+
+    clf = AutoClassifier()
+    assert clf.problem_type == ProblemTypes.BINARY
+
+
 def test_categorical_classification(X_y_categorical_classification):
     X, y = X_y_categorical_classification
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)