alteryx · bchen1116 · Jan 11, 2021 · Jan 7, 2021 · Jan 7, 2021 · Jan 7, 2021
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -9,6 +9,7 @@ Release Notes
         * Added multiclass check to ``InvalidTargetDataCheck`` for two examples per class :pr:`1596`
         * Support graphviz 0.16 :pr:`1657`
         * Enhanced time series pipelines to accept empty features :pr:`1651`
+        * Added support for list inputs for objectives :pr:`1663`
     * Fixes
         * Fixed thresholding for pipelines in ``AutoMLSearch`` to only threshold binary classification pipelines :pr:`1622` :pr:`1626`
         * Updated ``load_data`` to return Woodwork structures and update default parameter value for ``index`` to ``None`` :pr:`1610`

diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py
@@ -76,7 +76,7 @@ def _standardize_input_type(input_data):
         """Standardize input to pandas for scoring.
 
         Arguments:
-            input_data (ww.DataTable, ww.DataColumn, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities
+            input_data (list, ww.DataTable, ww.DataColumn, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities
 
         Returns:
             pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided.
@@ -87,9 +87,14 @@ def _standardize_input_type(input_data):
             return _convert_woodwork_types_wrapper(input_data.to_dataframe())
         if isinstance(input_data, ww.DataColumn):
             return _convert_woodwork_types_wrapper(input_data.to_series())
-        if len(input_data.shape) == 1:
+        if isinstance(input_data, list):
+            if isinstance(input_data[0], list):
+                return pd.DataFrame(input_data)
             return pd.Series(input_data)
-        return pd.DataFrame(input_data)
+        if isinstance(input_data, np.ndarray):
+            if len(input_data.shape) == 1:
+                return pd.Series(input_data)
+            return pd.DataFrame(input_data)
 
     def validate_inputs(self, y_true, y_predicted):
         """Validates the input based on a few simple checks.

diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py
@@ -86,8 +86,8 @@ def fit(self, X, y=None):
         """Fits component to data
 
         Arguments:
-            X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]
-            y (ww.DataColumn, pd.Series, np.ndarray, optional): The target training data of length [n_samples]
+            X (list, ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]
+            y (list, ww.DataColumn, pd.Series, np.ndarray, optional): The target training data of length [n_samples]
 
         Returns:
             self

diff --git a/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py b/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py
@@ -91,7 +91,8 @@ def _encode_categories(self, X, fit=False):
         return X_encoded
 
     def _encode_labels(self, y):
-        y_encoded = pd.Series(y)
+        y_encoded = _convert_to_woodwork_structure(y)
+        y_encoded = _convert_woodwork_types_wrapper(y_encoded.to_series())
         # change only if dtype isn't int
         if not is_integer_dtype(y_encoded):
             self._label_encoder = LabelEncoder()

diff --git a/evalml/pipelines/components/transformers/transformer.py b/evalml/pipelines/components/transformers/transformer.py
@@ -3,6 +3,10 @@
 from evalml.exceptions import MethodPropertyNotFoundError
 from evalml.model_family import ModelFamily
 from evalml.pipelines.components import ComponentBase
+from evalml.utils.gen_utils import (
+    _convert_to_woodwork_structure,
+    _convert_woodwork_types_wrapper
+)
 
 
 class Transformer(ComponentBase):
@@ -47,7 +51,11 @@ def fit_transform(self, X, y=None):
             pd.DataFrame: Transformed X
         """
         try:
-            X_t = self._component_obj.fit_transform(X, y)
+            X2 = _convert_to_woodwork_structure(X)
+            y2 = _convert_to_woodwork_structure(y)
+            X2 = _convert_woodwork_types_wrapper(X2.to_dataframe())
+            y2 = _convert_woodwork_types_wrapper(y2.to_series())
+            X_t = self._component_obj.fit_transform(X2, y2)
         except AttributeError:
             try:
                 self.fit(X, y)

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -1050,10 +1050,10 @@ def test_results_getter(mock_fit, mock_score, X_y_binary):
     assert automl.results['pipeline_results'][0]['score'] == 1.0
 
 
-@pytest.mark.parametrize("data_type", ['np', 'pd', 'ww'])
+@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww'])
 @pytest.mark.parametrize("automl_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS])
 @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean'])
-def test_targets_pandas_data_types_classification(data_type, automl_type, target_type):
+def test_targets_pandas_data_types_classification(data_type, automl_type, target_type, make_data_type):
     if data_type == 'np' and target_type in ['Int64', 'boolean']:
         pytest.skip("Skipping test where data type is numpy and target type is nullable dtype")
 
@@ -1076,14 +1076,9 @@ def test_targets_pandas_data_types_classification(data_type, automl_type, target
         y = y.map({unique_vals[i]: float(i) for i in range(len(unique_vals))})
 
     y = y.astype(target_type)
-
-    if data_type == 'np':
-        X = X.to_numpy()
-        y = y.to_numpy()
-
-    elif data_type == 'ww':
-        X = ww.DataTable(X)
-        y = ww.DataColumn(y)
+    if data_type != 'pd':
+        X = make_data_type(data_type, X)
+        y = make_data_type(data_type, y)
 
     automl = AutoMLSearch(X_train=X, y_train=y, problem_type=automl_type, max_iterations=3, n_jobs=1)
     automl.search()

diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py
@@ -825,6 +825,19 @@ def test_all_estimators_check_fit(X_y_binary, test_estimator_needs_fitting_false
         component.feature_importance
 
 
+@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww'])
+def test_all_transformers_check_fit_input_type(data_type, X_y_binary, make_data_type):
+    X, y = X_y_binary
+    X = make_data_type(data_type, X)
+    y = make_data_type(data_type, y)
+    for component_class in _all_transformers():
+        if not component_class.needs_fitting:
+            continue
+
+        component = component_class()
+        component.fit(X, y)
+
+
 def test_no_fitting_required_components(X_y_binary, test_estimator_needs_fitting_false, helper_functions):
     X, y = X_y_binary
     for component_class in all_components() + [test_estimator_needs_fitting_false]:

diff --git a/evalml/tests/component_tests/test_estimators.py b/evalml/tests/component_tests/test_estimators.py
@@ -2,10 +2,14 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 from evalml.model_family import ModelFamily
 from evalml.pipelines.components import Estimator
-from evalml.pipelines.components.utils import _all_estimators_used_in_search
+from evalml.pipelines.components.utils import (
+    _all_estimators_used_in_search,
+    get_estimators
+)
 from evalml.problem_types import ProblemTypes, handle_problem_types
 
 
@@ -56,3 +60,28 @@ class MockEstimator(Estimator):
     mock_estimator.supported_problem_types = ['binary', 'multiclass']
     assert mock_estimator != MockEstimator()
     assert 'Mock Estimator' != mock_estimator
+
+
+@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww'])
+def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_type, helper_functions):
+    X, y = X_y_binary
+    X = make_data_type(data_type, X)
+    y = make_data_type(data_type, y)
+    estimators_to_check = [estimator for estimator in get_estimators('binary')]
+    for component_class in estimators_to_check:
+        component = helper_functions.safe_init_component_with_njobs_1(component_class)
+        component.fit(X, y)
+        component.predict(X)
+        component.predict_proba(X)
+
+
+@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww'])
+def test_all_estimators_check_fit_input_type_regression(data_type, X_y_regression, make_data_type, helper_functions):
+    X, y = X_y_regression
+    X = make_data_type(data_type, X)
+    y = make_data_type(data_type, y)
+    estimators_to_check = [estimator for estimator in get_estimators('regression')]
+    for component_class in estimators_to_check:
+        component = helper_functions.safe_init_component_with_njobs_1(component_class)
+        component.fit(X, y)
+        component.predict(X)
diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
@@ -516,6 +516,11 @@ def safe_init_pipeline_with_njobs_1(pipeline_class):
 def make_data_type():
     """Helper function to convert numpy or pandas input to the appropriate type for tests."""
     def _make_data_type(data_type, data):
+        if data_type == "li":
+            if isinstance(data, pd.DataFrame):
+                data = data.to_numpy()
+            data = data.tolist()
+            return data
         if data_type != "np":
             if len(data.shape) == 1:
                 data = pd.Series(data)

diff --git a/evalml/tests/objective_tests/test_fraud_detection.py b/evalml/tests/objective_tests/test_fraud_detection.py
@@ -33,10 +33,13 @@ def test_fraud_objective_function_amount_col(X_y_binary):
                           fraud_payout_percentage=.75,
                           amount_col="this column does not exist")
     y_predicted = pd.Series([.1, .5, .5])
-    y_true = pd.Series([True, False, True])
+    y_true = [True, False, True]
     with pytest.raises(ValueError, match="`this column does not exist` is not a valid column in X."):
         objective.objective_function(y_true, y_predicted, X)
 
+    with pytest.raises(ValueError, match="`this column does not exist` is not a valid column in X."):
+        objective.objective_function(y_true, y_predicted, X.tolist())
+
 
 def test_input_contains_nan(X_y_binary):
     fraud_cost = FraudCost(amount_col="value")
@@ -139,3 +142,18 @@ def test_fraud_objective_score(X_y_binary):
     pd.testing.assert_series_equal(out, expected_y_pred, check_names=False)
     score = fraud_cost.score(y_true, out, extra_columns)
     assert (score == 0.255)
+
+
+def test_fraud_objective_score_list(X_y_binary):
+    X, y = X_y_binary
+    fraud_cost = FraudCost(amount_col="value")
+
+    y_predicted = [.1, .5, .5]
+    y_true = [True, False, True]
+    extra_columns = pd.DataFrame({"value": [100, 5, 250]})
+
+    out = fraud_cost.decision_function(y_predicted, 5, extra_columns)
+    assert isinstance(out, pd.Series)
+    pd.testing.assert_series_equal(out, pd.Series(y_true), check_names=False)
+    score = fraud_cost.score(y_true, out, extra_columns)
+    assert (score == 0.0)
diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -1597,11 +1597,11 @@ def test_get_default_parameters(logistic_regression_binary_pipeline_class):
     assert logistic_regression_binary_pipeline_class.default_parameters == expected_defaults
 
 
-@pytest.mark.parametrize("data_type", ['np', 'pd', 'ww'])
+@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww'])
 @pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS])
 @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean'])
 def test_targets_data_types_classification_pipelines(data_type, problem_type, target_type, all_binary_pipeline_classes,
-                                                     all_multiclass_pipeline_classes, helper_functions):
+                                                     make_data_type, all_multiclass_pipeline_classes, helper_functions):
     if data_type == 'np' and target_type in ['Int64', 'boolean']:
         pytest.skip("Skipping test where data type is numpy and target type is nullable dtype")
 
@@ -1633,13 +1633,8 @@ def test_targets_data_types_classification_pipelines(data_type, problem_type, ta
         y = y.astype(target_type)
     unique_vals = y.unique()
 
-    if data_type == 'np':
-        X = X.to_numpy()
-        y = y.to_numpy()
-
-    elif data_type == 'ww':
-        X = ww.DataTable(X)
-        y = ww.DataColumn(y)
+    X = make_data_type(data_type, X)
+    y = make_data_type(data_type, y)
 
     for pipeline_class in pipeline_classes:
         pipeline = helper_functions.safe_init_pipeline_with_njobs_1(pipeline_class)

diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
@@ -219,7 +219,7 @@ def _rename_column_names_to_numeric(X):
         Transformed X where column names are renamed to numerical values
     """
     X_t = X
-    if isinstance(X, np.ndarray):
+    if isinstance(X, (np.ndarray, list)):
         return pd.DataFrame(X)
     if isinstance(X, ww.DataTable):
         X_t = X.to_dataframe()