diff --git a/evalml/__init__.py b/evalml/__init__.py
index fb0759cff5..b7ee428d54 100644
--- a/evalml/__init__.py
+++ b/evalml/__init__.py
@@ -11,10 +11,10 @@
 import evalml.objectives
 import evalml.tuners
 import evalml.demos
+import evalml.problem_types
 import evalml.pipelines
 
 from evalml.pipelines import list_model_types, save_pipeline, load_pipeline
 from evalml.models import AutoClassifier, AutoRegressor
 
-
 __version__ = '0.2.0'
diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
index 10e1c1eee4..6cacb3dcd6 100644
--- a/evalml/models/auto_base.py
+++ b/evalml/models/auto_base.py
@@ -9,21 +9,19 @@
 from tqdm import tqdm
 
 from evalml import preprocessing
-from evalml.objectives import get_objective
+from evalml.objectives import get_objective, get_objectives
 from evalml.pipelines import get_pipelines
 from evalml.tuners import SKOptTuner
 
 
 class AutoBase:
     def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
-                 model_types, default_objectives, detect_label_leakage, start_iteration_callback,
+                 model_types, detect_label_leakage, start_iteration_callback,
                  add_result_callback, random_state, verbose):
-
         if tuner is None:
             tuner = SKOptTuner
 
         self.objective = get_objective(objective)
-
         self.max_pipelines = max_pipelines
         self.max_time = max_time
         self.model_types = model_types
@@ -34,6 +32,8 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
         self.verbose = verbose
 
         self.possible_pipelines = get_pipelines(problem_type=problem_type, model_types=model_types)
+        objective = get_objective(objective)
+        default_objectives = get_objectives(problem_type)
 
         self.results = {}
         self.trained_pipelines = {}
diff --git a/evalml/models/auto_classifier.py b/evalml/models/auto_classifier.py
index b3ac32db14..f8b0e98d3a 100644
--- a/evalml/models/auto_classifier.py
+++ b/evalml/models/auto_classifier.py
@@ -3,7 +3,7 @@
 
 from .auto_base import AutoBase
 
-from evalml.objectives import standard_metrics
+from evalml.problem_types import ProblemTypes
 
 
 class AutoClassifier(AutoBase):
@@ -11,6 +11,7 @@ class AutoClassifier(AutoBase):
 
     def __init__(self,
                  objective=None,
+                 multiclass=False,
                  max_pipelines=5,
                  max_time=None,
                  model_types=None,
@@ -26,6 +27,8 @@ def __init__(self,
         Arguments:
             objective (Object): the objective to optimize
 
+            multiclass (bool): If True, expecting multiclass data. By default: False.
+
             max_pipelines (int): maximum number of pipelines to search
 
             max_time (int): maximum time in seconds to search for pipelines.
@@ -57,16 +60,9 @@ def __init__(self,
         if cv is None:
             cv = StratifiedKFold(n_splits=3, random_state=random_state)
 
-        default_objectives = [
-            standard_metrics.F1(),
-            standard_metrics.Precision(),
-            standard_metrics.Recall(),
-            standard_metrics.AUC(),
-            standard_metrics.LogLoss()
-        ]
-
-        problem_type = "classification"
-
+        problem_type = ProblemTypes.BINARY
+        if multiclass:
+            problem_type = ProblemTypes.MULTICLASS
         super().__init__(
             tuner=tuner,
             objective=objective,
@@ -75,7 +71,6 @@ def __init__(self,
             max_time=max_time,
             model_types=model_types,
             problem_type=problem_type,
-            default_objectives=default_objectives,
             detect_label_leakage=detect_label_leakage,
             start_iteration_callback=start_iteration_callback,
             add_result_callback=add_result_callback,
diff --git a/evalml/models/auto_regressor.py b/evalml/models/auto_regressor.py
index 078bc276fc..a379d771e4 100644
--- a/evalml/models/auto_regressor.py
+++ b/evalml/models/auto_regressor.py
@@ -2,7 +2,7 @@
 
 from .auto_base import AutoBase
 
-from evalml.objectives import standard_metrics
+from evalml.problem_types import ProblemTypes
 
 
 class AutoRegressor(AutoBase):
@@ -54,15 +54,11 @@ def __init__(self,
         if objective is None:
             objective = "R2"
 
-        default_objectives = [
-            standard_metrics.R2(),
-        ]
+        problem_type = ProblemTypes.REGRESSION
 
         if cv is None:
             cv = KFold(n_splits=3, random_state=random_state)
 
-        problem_type = "regression"
-
         super().__init__(
             tuner=tuner,
             objective=objective,
@@ -71,7 +67,6 @@ def __init__(self,
             max_time=max_time,
             model_types=model_types,
             problem_type=problem_type,
-            default_objectives=default_objectives,
             detect_label_leakage=detect_label_leakage,
             start_iteration_callback=start_iteration_callback,
             add_result_callback=add_result_callback,
diff --git a/evalml/objectives/__init__.py b/evalml/objectives/__init__.py
index 593d10e324..1bd87cd10c 100644
--- a/evalml/objectives/__init__.py
+++ b/evalml/objectives/__init__.py
@@ -1,8 +1,11 @@
 # flake8:noqa
 from .fraud_cost import FraudCost
 from .lead_scoring import LeadScoring
-from .standard_metrics import F1, Precision, Recall, AUC, LogLoss, MCC, R2
+from .standard_metrics import (
+    F1, F1Micro, F1Macro, F1Weighted, Precision, PrecisionMicro, PrecisionMacro, PrecisionWeighted, Recall, RecallMicro, RecallMacro, RecallWeighted,
+    AUC, AUCMicro, AUCMacro, AUCWeighted, LogLoss, MCC, R2
+    )
 from .objective_base import ObjectiveBase
-from .utils import get_objective
+from .utils import get_objective, get_objectives
 
 
diff --git a/evalml/objectives/fraud_cost.py b/evalml/objectives/fraud_cost.py
index f3923aed8d..0e0b99d61c 100644
--- a/evalml/objectives/fraud_cost.py
+++ b/evalml/objectives/fraud_cost.py
@@ -35,7 +35,6 @@ def __init__(self, retry_percentage=.5, interchange_fee=.02,
     def decision_function(self, y_predicted, extra_cols, threshold):
         """Determine if transaction is fraud given predicted probabilities,
             dataframe with transaction amount, and threshold"""
-
         transformed_probs = (y_predicted * extra_cols[self.amount_col])
         return transformed_probs > threshold
 
diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py
index 15274c4867..e64560111d 100644
--- a/evalml/objectives/objective_base.py
+++ b/evalml/objectives/objective_base.py
@@ -1,5 +1,7 @@
 from scipy.optimize import minimize_scalar
 
+from evalml.problem_types import handle_problem_types
+
 
 class ObjectiveBase:
     needs_fitting = False
@@ -7,10 +9,17 @@ class ObjectiveBase:
     fit_needs_proba = True
     score_needs_proba = False
     uses_extra_columns = False
+    problem_types = []
 
     def __init__(self, verbose=False):
         self.verbose = verbose
 
+    def supports_problem_type(self, problem_type):
+        problem_type = handle_problem_types(problem_type)
+        if problem_type in self.__class__.problem_types:
+            return True
+        return False
+
     def fit(self, y_predicted, y_true, extra_cols=None):
         """Learn the objective function based on the predictions from a model.
 
diff --git a/evalml/objectives/standard_metrics.py b/evalml/objectives/standard_metrics.py
index 139b7da1e1..f00cf6ec7e 100644
--- a/evalml/objectives/standard_metrics.py
+++ b/evalml/objectives/standard_metrics.py
@@ -1,7 +1,11 @@
+import numpy as np
 from sklearn import metrics
+from sklearn.preprocessing import label_binarize
 
 from .objective_base import ObjectiveBase
 
+from evalml.problem_types import ProblemTypes
+
 
 # todo does this need tuning?
 class F1(ObjectiveBase):
@@ -9,11 +13,43 @@ class F1(ObjectiveBase):
     greater_is_better = True
     need_proba = False
     name = "F1"
+    problem_types = [ProblemTypes.BINARY]
 
     def score(self, y_predicted, y_true):
         return metrics.f1_score(y_true, y_predicted)
 
-# todo does this need tuning?
+
+class F1Micro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "F1 Micro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='micro')
+
+
+class F1Macro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "F1 Macro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='macro')
+
+
+class F1Weighted(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "F1 Weighted"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='weighted')
 
 
 class Precision(ObjectiveBase):
@@ -21,36 +57,142 @@ class Precision(ObjectiveBase):
     greater_is_better = True
     need_proba = False
     name = "Precision"
+    problem_types = [ProblemTypes.BINARY]
 
     def score(self, y_predicted, y_true):
         return metrics.precision_score(y_true, y_predicted)
 
 
+class PrecisionMicro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Precision Micro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.precision_score(y_true, y_predicted, average='micro')
+
+
+class PrecisionMacro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Precision Macro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.precision_score(y_true, y_predicted, average='macro')
+
+
+class PrecisionWeighted(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Precision Weighted"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.precision_score(y_true, y_predicted, average='weighted')
+
+
 class Recall(ObjectiveBase):
     needs_fitting = False
     greater_is_better = True
     need_proba = False
     name = "Recall"
+    problem_types = [ProblemTypes.BINARY]
 
     def score(self, y_predicted, y_true):
         return metrics.f1_score(y_true, y_predicted)
 
 
+class RecallMicro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Recall Micro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='micro')
+
+
+class RecallMacro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Recall Macro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='macro')
+
+
+class RecallWeighted(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    need_proba = False
+    name = "Recall"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        return metrics.f1_score(y_true, y_predicted, average='weighted')
+
+
 class AUC(ObjectiveBase):
     needs_fitting = False
     greater_is_better = True
     score_needs_proba = True
     name = "AUC"
+    problem_types = [ProblemTypes.BINARY]
 
     def score(self, y_predicted, y_true):
         return metrics.roc_auc_score(y_true, y_predicted)
 
 
+class AUCMicro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    score_needs_proba = True
+    name = "AUC Micro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        y_true, y_predicted = _handle_predictions(y_true, y_predicted)
+        return metrics.roc_auc_score(y_true, y_predicted, average='micro')
+
+
+class AUCMacro(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    score_needs_proba = True
+    name = "AUC Macro"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        y_true, y_predicted = _handle_predictions(y_true, y_predicted)
+        return metrics.roc_auc_score(y_true, y_predicted, average='macro')
+
+
+class AUCWeighted(ObjectiveBase):
+    needs_fitting = False
+    greater_is_better = True
+    score_needs_proba = True
+    name = "AUC"
+    problem_types = [ProblemTypes.MULTICLASS]
+
+    def score(self, y_predicted, y_true):
+        y_true, y_predicted = _handle_predictions(y_true, y_predicted)
+        return metrics.roc_auc_score(y_true, y_predicted, average='weighted')
+
+
 class LogLoss(ObjectiveBase):
     needs_fitting = False
     greater_is_better = False
     score_needs_proba = True
     name = "Log Loss"
+    problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]
 
     def score(self, y_predicted, y_true):
         return metrics.log_loss(y_true, y_predicted)
@@ -61,6 +203,7 @@ class MCC(ObjectiveBase):
     greater_is_better = True
     need_proba = False
     name = "MCC"
+    problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]
 
     def score(self, y_predicted, y_true):
         return metrics.matthews_corrcoef(y_true, y_predicted)
@@ -71,6 +214,15 @@ class R2(ObjectiveBase):
     greater_is_better = True
     need_proba = False
     name = "R2"
+    problem_types = [ProblemTypes.REGRESSION]
 
     def score(self, y_predicted, y_true):
         return metrics.r2_score(y_true, y_predicted)
+
+
+def _handle_predictions(y_true, y_pred):
+    if len(np.unique(y_true)) > 2:
+        classes = np.unique(y_true)
+        y_true = label_binarize(y_true, classes=classes)
+
+    return y_true, y_pred
diff --git a/evalml/objectives/utils.py b/evalml/objectives/utils.py
index 50985f46db..933afbc29f 100644
--- a/evalml/objectives/utils.py
+++ b/evalml/objectives/utils.py
@@ -1,21 +1,54 @@
 from . import standard_metrics
 from .objective_base import ObjectiveBase
 
+from evalml.problem_types import handle_problem_types
+
+OPTIONS = {
+    "f1": standard_metrics.F1(),
+    'f1_micro': standard_metrics.F1Micro(),
+    'f1_macro': standard_metrics.F1Macro(),
+    'f1_weighted': standard_metrics.F1Weighted(),
+    "precision": standard_metrics.Precision(),
+    "precision_micro": standard_metrics.PrecisionMicro(),
+    "precision_macro": standard_metrics.PrecisionMacro(),
+    "precision_weighted": standard_metrics.PrecisionWeighted(),
+    "recall": standard_metrics.Recall(),
+    "recall_micro": standard_metrics.RecallMicro(),
+    "recall_macro": standard_metrics.RecallMacro(),
+    "recall_weighted": standard_metrics.RecallWeighted(),
+    "auc": standard_metrics.AUC(),
+    "auc_micro": standard_metrics.AUCMicro(),
+    "auc_macro": standard_metrics.AUCMacro(),
+    "auc_weighted": standard_metrics.AUCWeighted(),
+    "log_loss": standard_metrics.LogLoss(),
+    "mcc": standard_metrics.MCC(),
+    "r2": standard_metrics.R2(),
+}
+
 
 def get_objective(objective):
+    """Returns the Objective object of the given objective name
+
+    Args:
+        objective (str) : name of the objective
+
+    Returns:
+        Objective
+    """
     if isinstance(objective, ObjectiveBase):
         return objective
-
     objective = objective.lower()
+    return OPTIONS[objective]
+
+
+def get_objectives(problem_type):
+    """Returns all objectives associated with the given problem types
+
+    Args:
+        problem_type (str/ProblemTypes) : type of problem
 
-    options = {
-        "f1": standard_metrics.F1(),
-        "precision": standard_metrics.Precision(),
-        "recall": standard_metrics.Recall(),
-        "auc": standard_metrics.AUC(),
-        "log_loss": standard_metrics.LogLoss(),
-        "mcc": standard_metrics.MCC(),
-        "r2": standard_metrics.R2(),
-    }
-
-    return options[objective]
+    Returns:
+        List of Objectives
+    """
+    problem_type = handle_problem_types(problem_type)
+    return [obj for obj in OPTIONS if OPTIONS[obj].supports_problem_type(problem_type)]
diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
index 49853ed9be..35513d75cc 100644
--- a/evalml/pipelines/classification/logistic_regression.py
+++ b/evalml/pipelines/classification/logistic_regression.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
@@ -6,12 +7,13 @@
 from skopt.space import Real
 
 from evalml.pipelines import PipelineBase
+from evalml.problem_types import ProblemTypes
 
 
 class LogisticRegressionPipeline(PipelineBase):
     name = "LogisticRegression w/ imputation + scaling"
     model_type = "linear_model"
-    problem_type = "classification"
+    problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]
 
     hyperparameters = {
         "penalty": ["l2"],
@@ -26,6 +28,7 @@ def __init__(self, objective, penalty, C, impute_strategy,
         estimator = LogisticRegression(random_state=random_state,
                                        penalty=penalty,
                                        C=C,
+                                       multi_class='auto',
                                        solver="lbfgs",
                                        n_jobs=-1)
 
@@ -40,8 +43,16 @@ def __init__(self, objective, penalty, C, impute_strategy,
     @property
     def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
-        importances = list(zip(self.input_feature_names, self.pipeline["estimator"].coef_[0]))  # note: this only works for binary
-        importances.sort(key=lambda x: -abs(x[1]))
+        coef_ = self.pipeline["estimator"].coef_
+
+        # binary classification case
+        if len(coef_) <= 2:
+            importances = list(zip(self.input_feature_names, coef_[0]))
+            importances.sort(key=lambda x: -abs(x[1]))
+        else:
+            # mutliclass classification case
+            importances = list(zip(self.input_feature_names, np.linalg.norm(coef_, axis=0, ord=2)))
+            importances.sort(key=lambda x: -(x[1]))
 
         df = pd.DataFrame(importances, columns=["feature", "importance"])
         return df
diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index 2ca1d80d9e..ce520436ec 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -7,12 +7,13 @@
 from skopt.space import Integer, Real
 
 from evalml.pipelines import PipelineBase
+from evalml.problem_types import ProblemTypes
 
 
 class RFClassificationPipeline(PipelineBase):
     name = "Random Forest w/ imputation"
     model_type = "random_forest"
-    problem_type = "classification"
+    problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]
 
     hyperparameters = {
         "n_estimators": Integer(10, 1000),
@@ -49,7 +50,7 @@ def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
         indices = self.pipeline["feature_selection"].get_support(indices=True)
         feature_names = list(map(lambda i: self.input_feature_names[i], indices))
-        importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))  # note: this only works for binary
+        importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))
         importances.sort(key=lambda x: -abs(x[1]))
 
         df = pd.DataFrame(importances, columns=["feature", "importance"])
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
index 37468ef374..2c3726157b 100644
--- a/evalml/pipelines/classification/xgboost.py
+++ b/evalml/pipelines/classification/xgboost.py
@@ -7,12 +7,13 @@
 from xgboost import XGBClassifier
 
 from evalml.pipelines import PipelineBase
+from evalml.problem_types import ProblemTypes
 
 
 class XGBoostPipeline(PipelineBase):
     name = "XGBoost w/ imputation"
     model_type = "xgboost"
-    problem_type = "classification"
+    problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]
 
     hyperparameters = {
         "eta": Real(0, 1),
@@ -47,12 +48,41 @@ def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy,
 
         super().__init__(objective=objective, random_state=random_state)
 
+    # Need to override fit for multiclass
+    def fit(self, X, y, objective_fit_size=.2):
+        """Build a model
+
+        Arguments:
+            X (pd.DataFrame or np.array): the input training data of shape [n_samples, n_features]
+
+            y (pd.Series): the target training labels of length [n_samples]
+
+        Returns:
+
+            self
+
+        """
+        # check if problem is multiclass
+        num_classes = len(np.unique(y))
+        if num_classes > 2:
+            params = self.pipeline['estimator'].get_params()
+            params.update(
+                {
+                    "objective": 'multi:softprob',
+                    "num_class": num_classes
+                })
+
+            estimator = XGBClassifier(**params)
+            self.pipeline.steps[-1] = ('estimator', estimator)
+
+        return super().fit(X, y, objective_fit_size)
+
     @property
     def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
         indices = self.pipeline["feature_selection"].get_support(indices=True)
         feature_names = list(map(lambda i: self.input_feature_names[i], indices))
-        importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))  # note: this only works for binary
+        importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))
         importances.sort(key=lambda x: -abs(x[1]))
 
         df = pd.DataFrame(importances, columns=["feature", "importance"])
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
index 38cf2cdab2..d2dac014be 100644
--- a/evalml/pipelines/pipeline_base.py
+++ b/evalml/pipelines/pipeline_base.py
@@ -1,3 +1,4 @@
+import pandas as pd
 from sklearn.model_selection import train_test_split
 
 from evalml.objectives import get_objective
@@ -24,8 +25,13 @@ def fit(self, X, y, objective_fit_size=.2):
             self
 
         """
-        self.input_feature_names = X.columns.tolist()
+        if not isinstance(X, pd.DataFrame):
+            X = pd.DataFrame(X)
+
+        if not isinstance(y, pd.Series):
+            y = pd.Series(y)
 
+        self.input_feature_names = X.columns.tolist()
         if self.objective.needs_fitting:
             X, X_objective, y, y_objective = train_test_split(X, y, test_size=objective_fit_size, random_state=self.random_state)
 
@@ -76,7 +82,11 @@ def predict_proba(self, X):
             DataFrame : probability estimates
         """
 
-        return self.pipeline.predict_proba(X)[:, 1]
+        proba = self.pipeline.predict_proba(X)
+        if proba.shape[1] <= 2:
+            return proba[:, 1]
+        else:
+            return proba
 
     def score(self, X, y, other_objectives=None):
         """Evaluate model performance
diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index 771a988cf6..e7120e49e3 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -7,12 +7,13 @@
 from skopt.space import Integer, Real
 
 from evalml.pipelines import PipelineBase
+from evalml.problem_types import ProblemTypes
 
 
 class RFRegressionPipeline(PipelineBase):
     name = "Random Forest w/ imputation"
     model_type = "random_forest"
-    problem_type = "regression"
+    problem_types = [ProblemTypes.REGRESSION]
 
     hyperparameters = {
         "n_estimators": Integer(10, 1000),
diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index 8a89ce2ee4..4f1d9846d6 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -7,6 +7,8 @@
 )
 from .regression import RFRegressionPipeline
 
+from evalml.problem_types import handle_problem_types
+
 ALL_PIPELINES = [RFClassificationPipeline, XGBoostPipeline, LogisticRegressionPipeline, RFRegressionPipeline]
 
 
@@ -15,7 +17,7 @@ def get_pipelines(problem_type, model_types=None):
 
     Arguments:
 
-        problem_type (str): the problem type the pipelines work for. Either regression or classification
+        problem_type(ProblemTypes or str): the problem type the pipelines work for.
         model_types(list[str]): model types to match. if none, return all pipelines
 
     Returns
@@ -26,8 +28,9 @@ def get_pipelines(problem_type, model_types=None):
 
     problem_pipelines = []
 
+    problem_type = handle_problem_types(problem_type)
     for p in ALL_PIPELINES:
-        if p.problem_type == problem_type:
+        if problem_type in p.problem_types:
             problem_pipelines.append(p)
 
     if model_types is None:
@@ -51,16 +54,16 @@ def list_model_types(problem_type):
     """List model type for a particular problem type
 
     Arguments:
-        problem_type (str): classification or regression
+        problem_types (ProblemType or str): binary, multiclass, or regression
 
     Returns:
         model_types, list of model types
     """
 
     problem_pipelines = []
-
+    problem_type = handle_problem_types(problem_type)
     for p in ALL_PIPELINES:
-        if p.problem_type == problem_type:
+        if problem_type in p.problem_types:
             problem_pipelines.append(p)
 
     return list(set([p.model_type for p in problem_pipelines]))
diff --git a/evalml/problem_types/__init__.py b/evalml/problem_types/__init__.py
new file mode 100644
index 0000000000..4a1f7eecc2
--- /dev/null
+++ b/evalml/problem_types/__init__.py
@@ -0,0 +1,3 @@
+# flake8:noqa
+from .problem_types import ProblemTypes
+from .utils import handle_problem_types
diff --git a/evalml/problem_types/problem_types.py b/evalml/problem_types/problem_types.py
new file mode 100644
index 0000000000..2cceb7b065
--- /dev/null
+++ b/evalml/problem_types/problem_types.py
@@ -0,0 +1,7 @@
+from enum import Enum
+
+
+class ProblemTypes(Enum):
+    BINARY = 'BINARY'
+    MULTICLASS = 'MULTICLASS'
+    REGRESSION = 'REGRESSION'
diff --git a/evalml/problem_types/utils.py b/evalml/problem_types/utils.py
new file mode 100644
index 0000000000..7cfa0ef516
--- /dev/null
+++ b/evalml/problem_types/utils.py
@@ -0,0 +1,22 @@
+from .problem_types import ProblemTypes
+
+
+def handle_problem_types(problem_type):
+    """Handles problem_type by either returning the ProblemTypes or converting from a str
+
+    Args:
+        problem_types (str or ProblemTypes) : problem type that needs to be handled
+
+    Returns:
+        ProblemTypes
+    """
+
+    if isinstance(problem_type, str):
+        try:
+            tpe = ProblemTypes[problem_type.upper()]
+        except KeyError:
+            raise KeyError('Problem type \'{}\' does not exist'.format(problem_type))
+        return tpe
+    if isinstance(problem_type, ProblemTypes):
+        return problem_type
+    raise ValueError('`handle_problem_types` was not passed a str or ProblemTypes object')
diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
index 035f3881bf..84eacec411 100644
--- a/evalml/tests/conftest.py
+++ b/evalml/tests/conftest.py
@@ -13,6 +13,13 @@ def X_y():
     return X, y
 
 
+@pytest.fixture
+def X_y_multi():
+    X, y = datasets.make_classification(n_samples=100, n_features=20, n_classes=3,
+                                        n_informative=3, n_redundant=2, random_state=0)
+    return X, y
+
+
 @pytest.fixture
 def X_y_categorical_regression():
     flights = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index 07377b0fcb..5f5b06ae0a 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -1,18 +1,25 @@
+import numpy as np
 import pandas as pd
 from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit
 
 from evalml import AutoClassifier
-from evalml.objectives import FraudCost, Precision
+from evalml.objectives import (
+    FraudCost,
+    Precision,
+    PrecisionMicro,
+    get_objectives
+)
 from evalml.pipelines import PipelineBase, get_pipelines
+from evalml.problem_types import ProblemTypes
 
 
 def test_init(X_y):
     X, y = X_y
 
-    clf = AutoClassifier()
+    clf = AutoClassifier(multiclass=False)
 
     # check loads all pipelines
-    assert get_pipelines(problem_type="classification") == clf.possible_pipelines
+    assert get_pipelines(problem_type=ProblemTypes.BINARY) == clf.possible_pipelines
 
     clf.fit(X, y)
 
@@ -58,13 +65,13 @@ def test_init_select_model_types():
     model_types = ["random_forest"]
     clf = AutoClassifier(model_types=model_types)
 
-    assert get_pipelines(problem_type="classification", model_types=model_types) == clf.possible_pipelines
+    assert get_pipelines(problem_type=ProblemTypes.BINARY, model_types=model_types) == clf.possible_pipelines
     assert model_types == clf.possible_model_types
 
 
 def test_max_pipelines(X_y):
     X, y = X_y
-    max_pipelines = 3
+    max_pipelines = 6
     clf = AutoClassifier(max_pipelines=max_pipelines)
 
     clf.fit(X, y)
@@ -88,6 +95,30 @@ def test_specify_objective(X_y):
     clf.fit(X, y)
 
 
+def test_binary_auto(X_y):
+    X, y = X_y
+    clf = AutoClassifier(objective="recall", multiclass=False)
+    clf.fit(X, y)
+    y_pred = clf.best_pipeline.predict(X)
+    assert len(np.unique(y_pred)) == 2
+
+
+def test_multi_auto(X_y_multi):
+    X, y = X_y_multi
+    clf = AutoClassifier(objective="recall_micro", multiclass=True)
+    clf.fit(X, y)
+    y_pred = clf.best_pipeline.predict(X)
+    assert len(np.unique(y_pred)) == 3
+
+    objective = PrecisionMicro()
+    clf = AutoClassifier(objective=objective, multiclass=True)
+    clf.fit(X, y)
+    y_pred = clf.best_pipeline.predict(X)
+    assert len(np.unique(y_pred)) == 3
+
+    assert clf.default_objectives == get_objectives('multiclass')
+
+
 def test_random_state(X_y):
     X, y = X_y
 
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 05e98695ca..c1d86b5d75 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -4,6 +4,7 @@
 from evalml import AutoRegressor
 from evalml.demos import load_diabetes
 from evalml.pipelines import PipelineBase, get_pipelines
+from evalml.problem_types import ProblemTypes
 
 
 @pytest.fixture
@@ -17,7 +18,7 @@ def test_init(X_y):
     clf = AutoRegressor(objective="R2", max_pipelines=3)
 
     # check loads all pipelines
-    assert get_pipelines(problem_type="regression") == clf.possible_pipelines
+    assert get_pipelines(problem_type=ProblemTypes.REGRESSION) == clf.possible_pipelines
 
     clf.fit(X, y)
 
diff --git a/evalml/tests/test_logistic_regression.py b/evalml/tests/test_logistic_regression.py
new file mode 100644
index 0000000000..d98e41016e
--- /dev/null
+++ b/evalml/tests/test_logistic_regression.py
@@ -0,0 +1,15 @@
+import numpy as np
+
+from evalml.objectives import PrecisionMicro
+from evalml.pipelines import LogisticRegressionPipeline
+
+
+def test_lr_multi(X_y_multi):
+    X, y = X_y_multi
+    objective = PrecisionMicro()
+    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
+    clf.fit(X, y)
+    clf.score(X, y)
+    y_pred = clf.predict(X)
+    assert len(np.unique(y_pred)) == 3
+    assert len(clf.feature_importances) == len(X[0])
diff --git a/evalml/tests/test_objectives.py b/evalml/tests/test_objectives.py
new file mode 100644
index 0000000000..35df777b6a
--- /dev/null
+++ b/evalml/tests/test_objectives.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+from evalml.objectives import (
+    Precision,
+    PrecisionMacro,
+    PrecisionMicro,
+    get_objective,
+    get_objectives
+)
+from evalml.pipelines import LogisticRegressionPipeline
+
+
+def test_get_objective():
+    assert isinstance(get_objective('precision'), Precision)
+    assert isinstance(get_objective(Precision()), Precision)
+
+
+def test_get_objectives_types():
+    assert len(get_objectives('multiclass')) == 14
+    assert len(get_objectives('binary')) == 6
+    assert len(get_objectives('regression')) == 1
+
+
+def test_binary_average(X_y):
+    X, y = X_y
+    X = pd.DataFrame(X)
+    y = pd.Series(y)
+
+    pipeline = LogisticRegressionPipeline(objective=Precision(), penalty='l2', C=1.0, impute_strategy='mean', number_features=0)
+    pipeline.fit(X, y)
+    y_pred = pipeline.predict(X)
+
+    assert Precision().score(y, y_pred) == PrecisionMicro().score(y, y_pred)
+    assert Precision().score(y, y_pred) == PrecisionMacro().score(y, y_pred)
diff --git a/evalml/tests/test_pipelines.py b/evalml/tests/test_pipelines.py
index 284a661f10..b549a1fdb9 100644
--- a/evalml/tests/test_pipelines.py
+++ b/evalml/tests/test_pipelines.py
@@ -4,34 +4,26 @@
 
 import pandas as pd
 import pytest
-from sklearn import datasets
 
 import evalml.tests as tests
 from evalml import load_pipeline, save_pipeline
-from evalml.objectives import FraudCost
+from evalml.objectives import FraudCost, Precision
 from evalml.pipelines import LogisticRegressionPipeline
 from evalml.pipelines.utils import get_pipelines, list_model_types
+from evalml.problem_types import ProblemTypes
 
 CACHE = os.path.join(os.path.dirname(tests.__file__), '.cache')
 
 
-@pytest.fixture
-def data():
-    X, y = datasets.make_classification(n_samples=100, n_features=20,
-                                        n_informative=2, n_redundant=2, random_state=0)
-
-    return X, y
-
-
 def test_list_model_types():
-    assert set(list_model_types("classification")) == set(["random_forest", "xgboost", "linear_model"])
-    assert set(list_model_types("regression")) == set(["random_forest"])
+    assert set(list_model_types(ProblemTypes.BINARY)) == set(["random_forest", "xgboost", "linear_model"])
+    assert set(list_model_types(ProblemTypes.REGRESSION)) == set(["random_forest"])
 
 
 def test_get_pipelines():
-    assert len(get_pipelines(problem_type="classification")) == 3
-    assert len(get_pipelines(problem_type="classification", model_types=["linear_model"])) == 1
-    assert len(get_pipelines(problem_type="regression")) == 1
+    assert len(get_pipelines(problem_type=ProblemTypes.BINARY)) == 3
+    assert len(get_pipelines(problem_type=ProblemTypes.BINARY, model_types=["linear_model"])) == 1
+    assert len(get_pipelines(problem_type=ProblemTypes.REGRESSION)) == 1
 
 
 @pytest.fixture
@@ -49,7 +41,10 @@ def path_management():
 def test_serialization(X_y, trained_model, path_management):
     X, y = X_y
     path = os.path.join(path_management, 'pipe.pkl')
-    pipeline = trained_model.best_pipeline
+    objective = Precision()
+
+    pipeline = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
+    pipeline.fit(X, y)
     save_pipeline(pipeline, path)
     assert pipeline.score(X, y) == load_pipeline(path).score(X, y)
 
@@ -57,7 +52,6 @@ def test_serialization(X_y, trained_model, path_management):
 def test_reproducibility(X_y):
     X, y = X_y
     X = pd.DataFrame(X)
-    y = pd.Series(y)
 
     objective = FraudCost(
         retry_percentage=.5,
diff --git a/evalml/tests/test_problem_types.py b/evalml/tests/test_problem_types.py
new file mode 100644
index 0000000000..ebfa9037d1
--- /dev/null
+++ b/evalml/tests/test_problem_types.py
@@ -0,0 +1,31 @@
+import pytest
+
+from evalml.problem_types import ProblemTypes, handle_problem_types
+
+
+@pytest.fixture
+def correct_problem_types():
+    correct_problem_types = [ProblemTypes.REGRESSION, ProblemTypes.MULTICLASS, ProblemTypes.BINARY]
+    yield correct_problem_types
+
+
+def test_handle_string(correct_problem_types):
+    problem_types = ['regression', 'multiclass', 'binary']
+    for problem_type in zip(problem_types, correct_problem_types):
+        assert handle_problem_types(problem_type[0]) == problem_type[1]
+
+    problem_type = 'fake'
+    error_msg = 'Problem type \'fake\' does not exist'
+    with pytest.raises(KeyError, match=error_msg):
+        handle_problem_types(problem_type) == ProblemTypes.REGRESSION
+
+
+def test_handle_problem_types(correct_problem_types):
+    for problem_type in correct_problem_types:
+        assert handle_problem_types(problem_type) == problem_type
+
+
+def test_handle_incorrect_type():
+    error_msg = '`handle_problem_types` was not passed a str or ProblemTypes object'
+    with pytest.raises(ValueError, match=error_msg):
+        handle_problem_types(5)
diff --git a/evalml/tests/test_rf.py b/evalml/tests/test_rf.py
new file mode 100644
index 0000000000..2e3ec42c59
--- /dev/null
+++ b/evalml/tests/test_rf.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+from evalml.objectives import PrecisionMicro
+from evalml.pipelines import RFClassificationPipeline
+
+
+def test_rf_multi(X_y_multi):
+    X, y = X_y_multi
+    objective = PrecisionMicro()
+    clf = RFClassificationPipeline(objective=objective, n_estimators=10, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
+    clf.fit(X, y)
+    clf.score(X, y)
+    y_pred = clf.predict(X)
+    assert len(np.unique(y_pred)) == 3
diff --git a/evalml/tests/test_xgboost.py b/evalml/tests/test_xgboost.py
new file mode 100644
index 0000000000..cc005f9302
--- /dev/null
+++ b/evalml/tests/test_xgboost.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+from evalml.objectives import PrecisionMicro
+from evalml.pipelines import XGBoostPipeline
+
+
+def test_xg_multi(X_y_multi):
+    X, y = X_y_multi
+    objective = PrecisionMicro()
+    clf = XGBoostPipeline(objective=objective, eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
+    clf.fit(X, y)
+    clf.score(X, y)
+    y_pred = clf.predict(X)
+    assert len(np.unique(y_pred)) == 3