alteryx · angela97lin · Dec 29, 2020 · Dec 14, 2020 · Dec 14, 2020 · Dec 14, 2020
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
@@ -538,4 +538,4 @@ General Utils
     get_random_seed
     pad_with_nans
     drop_rows_with_nans
-
+    infer_feature_types
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -17,6 +17,7 @@ Release Notes
         * Added more information to users about ensembling behavior in ``AutoMLSearch`` :pr:`1527`
         * Add woodwork support for more utility and graph methods :pr:`1544`
         * Changed ``DateTimeFeaturizer`` to encode features as int :pr:`1479`
+        * Added utility method so that users can set feature types without having to learn about Woodwork directly :pr:`1555`
         * Added Linear Discriminant Analysis transformer for dimensionality reduction :pr:`1331`
         * Added multiclass support for ``partial_dependence`` and ``graph_partial_dependence`` :pr:`1554`
         * Added ``TimeSeriesBinaryClassificationPipeline`` and ``TimeSeriesMulticlassClassificationPipeline`` classes :pr:`1528`

diff --git a/docs/source/user_guide/automl.ipynb b/docs/source/user_guide/automl.ipynb
@@ -47,7 +47,7 @@
    "source": [
     "__Note:__ To provide data to EvalML, it is recommended that you create a `DataTable` object using [the Woodwork project](https://woodwork.alteryx.com/en/stable/).\n",
     "\n",
-    "If a pandas `DataFrame` is provided for the input features, EvalML will convert it to a Woodwork `DataTable` under the hood, running additional inference logic to detect the type of each feature, most notably detecting if a categorical feature should be treated as a text feature instead. If you'd like to override Woodwork's inference, providing a `DataTable` as input makes it easy to control how EvalML will treat each feature, as a numeric feature, a categorical feature, a text feature or other type of feature."
+    "EvalML also accepts ``pandas`` input, and will run type inference on top of the input ``pandas`` data. If you\u2019d like to change the types inferred by EvalML, you can use the `infer_feature_types` utility method as follows. The `infer_feature_types` utility method takes pandas or numpy input and converts it to a Woodwork data structure. It takes in a `feature_types` parameter which can be used to specify what types specific columns should be. In the example below, we specify that the provider, which would have otherwise been inferred as a column with natural language, is a categorical column."
    ]
   },
   {
@@ -57,8 +57,17 @@
    "outputs": [],
    "source": [
     "import evalml\n",
-    "\n",
-    "X, y = evalml.demos.load_breast_cancer()\n",
+    "from evalml.utils import infer_feature_types\n",
+    "X, y = evalml.demos.load_fraud(return_pandas=True)\n",
+    "X = infer_feature_types(X, feature_types={'provider': 'categorical'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "automl = evalml.automl.AutoMLSearch(X_train=X, y_train=y, problem_type='binary')\n",
     "automl.search()"
    ]
@@ -381,4 +390,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/evalml/tests/utils_tests/test_gen_utils.py b/evalml/tests/utils_tests/test_gen_utils.py
@@ -22,6 +22,7 @@
     get_random_seed,
     get_random_state,
     import_or_raise,
+    infer_feature_types,
     jupyter_check,
     pad_with_nans,
     save_plot
@@ -398,6 +399,38 @@ def test_convert_to_woodwork_structure():
     assert np.array_equal(X_np, np.array([[1, 2], [3, 4]]))
 
 
+def test_infer_feature_types_dataframe():
+    X_pd = pd.DataFrame({0: pd.Series([1, 2]),
+                         1: pd.Series([3, 4])})
+    pd.testing.assert_frame_equal(X_pd, infer_feature_types(X_pd).to_dataframe(), check_dtype=False)
+
+    X_pd = pd.DataFrame({0: pd.Series([1, 2], dtype="Int64"),
+                         1: pd.Series([3, 4], dtype="Int64")})
+    pd.testing.assert_frame_equal(X_pd, infer_feature_types(X_pd).to_dataframe())
+
+    X_expected = X_pd.copy()
+    X_expected[0] = X_expected[0].astype("category")
+    pd.testing.assert_frame_equal(X_expected, infer_feature_types(X_pd, {0: "categorical"}).to_dataframe())
+    pd.testing.assert_frame_equal(X_expected, infer_feature_types(X_pd, {0: ww.logical_types.Categorical}).to_dataframe())
+
+
+def test_infer_feature_types_series():
+    X_pd = pd.Series([1, 2, 3, 4])
+    X_expected = X_pd.astype("Int64")
+    pd.testing.assert_series_equal(X_expected, infer_feature_types(X_pd).to_series())
+
+    X_pd = pd.Series([1, 2, 3, 4], dtype="Int64")
+    pd.testing.assert_series_equal(X_pd, infer_feature_types(X_pd).to_series())
+
+    X_pd = pd.Series([1, 2, 3, 4], dtype="Int64")
+    X_expected = X_pd.astype("category")
+    pd.testing.assert_series_equal(X_expected, infer_feature_types(X_pd, "categorical").to_series())
+
+    X_pd = pd.Series([1, 2, 3, 4], dtype="Int64")
+    X_expected = X_pd.astype("category")
+    pd.testing.assert_series_equal(X_expected, infer_feature_types(X_pd, ww.logical_types.Categorical).to_series())
+
+
 @pytest.mark.parametrize("file_name,format,interactive",
                          [
                              ('test_plot', 'png', False),

diff --git a/evalml/utils/__init__.py b/evalml/utils/__init__.py
@@ -13,6 +13,7 @@
     _convert_to_woodwork_structure,
     drop_rows_with_nans,
     pad_with_nans,
+    infer_feature_types,
     _get_rows_without_nans
 )
 from .cli_utils import print_info, get_evalml_root, get_installed_packages, get_sys_info, print_sys_info, print_deps
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
@@ -286,6 +286,28 @@ def is_all_numeric(df):
     return True
 
 
+def infer_feature_types(data, feature_types=None):
+    """Create a Woodwork structure from the given pandas or numpy input, with specified types for columns.
+        If a column's type is not specified, it will be inferred by Woodwork.
+
+    Arguments:
+        data (pd.DataFrame): Input data to convert to a Woodwork data structure.
+        feature_types (string, ww.logical_type obj, dict, optional): If data is a 2D structure, feature_types must be a dictionary
+            mapping column names to the type of data represented in the column. If data is a 1D structure, then feature_types must be
+            a Woodwork logical type or a string representing a Woodwork logical type ("Double", "Integer", "Boolean", "Categorical", "Datetime", "NaturalLanguage")
+
+    Returns:
+        A Woodwork data structure where the data type of each column was either specified or inferred.
+    """
+    ww_data = _convert_to_woodwork_structure(data)
+    if feature_types is not None:
+        if len(ww_data.shape) == 1:
+            ww_data = ww_data.set_logical_type(feature_types)
+        else:
+            ww_data = ww_data.set_types(logical_types=feature_types)
+    return ww_data
+
+
 def _convert_to_woodwork_structure(data):
     """
     Takes input data structure, and if it is not a Woodwork data structure already, will convert it to a Woodwork DataTable or DataColumn structure.