From c267dff530c79867baed1b5fbb781abbcfe19530 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 17:35:00 -0400
Subject: [PATCH 01/14] init

---
 evalml/data_checks/__init__.py                |  1 +
 .../detect_highly_null_data_check.py          | 45 ++++++++++++++++
 .../data_checks_tests/test_data_check.py      | 51 +++++++++++++++++++
 3 files changed, 97 insertions(+)
 create mode 100644 evalml/data_checks/detect_highly_null_data_check.py

diff --git a/evalml/data_checks/__init__.py b/evalml/data_checks/__init__.py
index bdd9897e07..17ec2cdbd5 100644
--- a/evalml/data_checks/__init__.py
+++ b/evalml/data_checks/__init__.py
@@ -3,3 +3,4 @@
 from .data_checks import DataChecks
 from .data_check_message import DataCheckMessage, DataCheckWarning, DataCheckError
 from .data_check_message_type import DataCheckMessageType
+from .detect_highly_null_data_check import DetectHighlyNullDataCheck
diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
new file mode 100644
index 0000000000..c94dad007d
--- /dev/null
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -0,0 +1,45 @@
+import pandas as pd
+
+from .data_check import DataCheck
+from .data_check_message import DataCheckWarning
+
+
+class DetectHighlyNullDataCheck(DataCheck):
+
+    def __init__(self, percent_threshold=0.95):
+        """TODO
+
+        Arguments:
+            percent_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to 0.95
+        """
+        if percent_threshold < 0 or percent_threshold > 1:
+            raise ValueError("percent_threshold must be a float between 0 and 1, inclusive.")
+        self.percent_threshold = percent_threshold
+
+    def validate(self, X, y=None):
+        """ Checks if there are any highly-null columns in a pd.Dataframe.
+
+        Arguments:
+            X (pd.DataFrame) : features
+            y : Ignored.
+
+        Returns:
+        Example:
+            >>> df = pd.DataFrame({
+            ...    'lots_of_null': [None, None, None, None, 5],
+            ...    'no_null': [1, 2, 3, 4, 5]
+            ... })
+            >>> null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
+            >>> null_check.validate(df)
+        """
+        messages = []
+        if not isinstance(X, pd.DataFrame):
+            X = pd.DataFrame(X)
+        percent_null = (X.isnull().mean()).to_dict()
+        highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
+        if len(highly_null_cols) > 0:
+            col_names_str = ', '.join([f"'{name}'" for name in list(highly_null_cols.keys())])
+            warning_msg = "Columns {} are more than {}% null".format(col_names_str, self.percent_threshold * 100.)
+            warning = DataCheckWarning(warning_msg, self.name)
+            messages.append(warning)
+        return messages
diff --git a/evalml/tests/data_checks_tests/test_data_check.py b/evalml/tests/data_checks_tests/test_data_check.py
index b0ea6ed11a..85714224e6 100644
--- a/evalml/tests/data_checks_tests/test_data_check.py
+++ b/evalml/tests/data_checks_tests/test_data_check.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -6,6 +7,9 @@
     DataCheckError,
     DataCheckWarning
 )
+from evalml.data_checks.detect_highly_null_data_check import (
+    DetectHighlyNullDataCheck
+)
 
 
 @pytest.fixture
@@ -62,3 +66,50 @@ def validate(self, X, y=None):
     data_check = MockDataCheckWithParam(num=0)
     errors_warnings = data_check.validate(X, y=None)
     assert errors_warnings == [DataCheckError("Expected num == 10", "MockDataCheckWithParam")]
+
+
+def test_highly_null_data_check_init():
+    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(percent_threshold=-0.1)
+    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(percent_threshold=1.1)
+
+
+def test_highly_null_data_check_empty_df():
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.1)
+    messages = highly_null_check.validate(pd.DataFrame())
+    assert messages == []
+
+
+def test_highly_null_data_check_no_warnings():
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
+    messages = highly_null_check.validate(pd.DataFrame({'lots_of_null': [None, None, None, None, 5], 'no_null': [1, 2, 3, 4, 5]}))
+    assert messages == []
+
+
+def test_highly_null_data_check_has_warnings():
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
+    messages = highly_null_check.validate(pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
+                                                        'all_null': [None, None, None, None, None],
+                                                        'no_null': [1, 2, 3, 4, 5]}))
+    assert messages == [DataCheckWarning("Columns 'lots_of_null', 'all_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+
+
+def test_highly_null_data_check_input_formats():
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
+
+    #  test list
+    messages = highly_null_check.validate([None, None, None, None, 5])
+    assert messages == [DataCheckWarning("Columns '0' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+
+    #  test pd.Series
+    messages = highly_null_check.validate(pd.Series([None, None, None, None, 5]))
+    assert messages == [DataCheckWarning("Columns '0' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+
+    #  test 2D list
+    messages = highly_null_check.validate([[None, None, None, None, 0], [None, None, None, "hi", 5]])
+    assert messages == [DataCheckWarning("Columns '0', '1', '2' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+
+    # test np.array
+    messages = highly_null_check.validate(np.array([[None, None, None, None, 0], [None, None, None, "hi", 5]]))
+    assert messages == [DataCheckWarning("Columns '0', '1', '2' are more than 80.0% null", "DetectHighlyNullDataCheck")]

From 92b6c94e3e04295990ae8ef078f1e65d1d64d4db Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 17:37:33 -0400
Subject: [PATCH 02/14] changelog

---
 docs/source/changelog.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 4fb23f1cea..f12805c45f 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -4,6 +4,7 @@ Changelog
 ---------
 **Future Releases**
     * Enhancements
+        * Port over highly-null guardrail as a data check and define `BasicDataChecks` and `DisableDataChecks` classes :pr:`745`
     * Fixes
     * Changes
         * Cleanup pipeline `score` code, and cleanup codecov :pr:`711`

From 1de7c70df7ad2e1ce97026d46add8c9dbcaa4cb6 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 17:47:51 -0400
Subject: [PATCH 03/14] docstr test'

---
 evalml/data_checks/detect_highly_null_data_check.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index c94dad007d..d3e536dd03 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -30,7 +30,8 @@ def validate(self, X, y=None):
             ...    'no_null': [1, 2, 3, 4, 5]
             ... })
             >>> null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
-            >>> null_check.validate(df)
+            >>> null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+            True
         """
         messages = []
         if not isinstance(X, pd.DataFrame):

From 0095684ad5d9752276a9ed6f454fb4f143879121 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 17:52:15 -0400
Subject: [PATCH 04/14] cleanup

---
 evalml/data_checks/detect_highly_null_data_check.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index d3e536dd03..8c602c4ff6 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -7,23 +7,26 @@
 class DetectHighlyNullDataCheck(DataCheck):
 
     def __init__(self, percent_threshold=0.95):
-        """TODO
+        """Checks if there are any highly-null columns in the input.
 
         Arguments:
             percent_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to 0.95
+
         """
         if percent_threshold < 0 or percent_threshold > 1:
             raise ValueError("percent_threshold must be a float between 0 and 1, inclusive.")
         self.percent_threshold = percent_threshold
 
     def validate(self, X, y=None):
-        """ Checks if there are any highly-null columns in a pd.Dataframe.
+        """Checks if there are any highly-null columns in the input.
 
         Arguments:
-            X (pd.DataFrame) : features
+            X (pd.DataFrame, pd.Series, np.array, list) : features
             y : Ignored.
 
         Returns:
+            list (DataCheckWarning): list with a DataCheckWarning if there are any highly-null columns.
+
         Example:
             >>> df = pd.DataFrame({
             ...    'lots_of_null': [None, None, None, None, 5],

From fd660e97687d9849919af2b3c4e600384a9e0992 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 18:21:22 -0400
Subject: [PATCH 05/14] add basic and empty data checks

---
 evalml/data_checks/__init__.py                |  1 +
 evalml/data_checks/basic_data_checks.py       | 13 ++++++++
 .../detect_highly_null_data_check.py          |  2 +-
 evalml/data_checks/empty_data_checks.py       | 12 ++++++++
 .../data_checks_tests/test_data_checks.py     | 30 +++++++++++++++----
 5 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 evalml/data_checks/basic_data_checks.py
 create mode 100644 evalml/data_checks/empty_data_checks.py

diff --git a/evalml/data_checks/__init__.py b/evalml/data_checks/__init__.py
index 17ec2cdbd5..1f75ba334b 100644
--- a/evalml/data_checks/__init__.py
+++ b/evalml/data_checks/__init__.py
@@ -4,3 +4,4 @@
 from .data_check_message import DataCheckMessage, DataCheckWarning, DataCheckError
 from .data_check_message_type import DataCheckMessageType
 from .detect_highly_null_data_check import DetectHighlyNullDataCheck
+from .basic_data_checks import BasicDataChecks
diff --git a/evalml/data_checks/basic_data_checks.py b/evalml/data_checks/basic_data_checks.py
new file mode 100644
index 0000000000..bcc88276fa
--- /dev/null
+++ b/evalml/data_checks/basic_data_checks.py
@@ -0,0 +1,13 @@
+from .data_checks import DataChecks
+from .detect_highly_null_data_check import DetectHighlyNullDataCheck
+
+
+class BasicDataChecks(DataChecks):
+    def __init__(self, data_checks=None):
+        """
+        A collection of data checks.
+
+        Arguments:
+            data_checks (list (DataCheck)): Ignored.
+        """
+        self.data_checks = [DetectHighlyNullDataCheck()]
diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 8c602c4ff6..8051e70e4d 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -43,7 +43,7 @@ def validate(self, X, y=None):
         highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
         if len(highly_null_cols) > 0:
             col_names_str = ', '.join([f"'{name}'" for name in list(highly_null_cols.keys())])
-            warning_msg = "Columns {} are more than {}% null".format(col_names_str, self.percent_threshold * 100.)
+            warning_msg = "Columns {} are more than {}% null".format(col_names_str, self.percent_threshold * 100)
             warning = DataCheckWarning(warning_msg, self.name)
             messages.append(warning)
         return messages
diff --git a/evalml/data_checks/empty_data_checks.py b/evalml/data_checks/empty_data_checks.py
new file mode 100644
index 0000000000..259531e0e0
--- /dev/null
+++ b/evalml/data_checks/empty_data_checks.py
@@ -0,0 +1,12 @@
+from .data_checks import DataChecks
+
+
+class EmptyDataChecks(DataChecks):
+    def __init__(self, data_checks=None):
+        """
+        An empty collection of data checks.
+
+        Arguments:
+            data_checks (list (DataCheck)): Ignored.
+        """
+        self.data_checks = []
diff --git a/evalml/tests/data_checks_tests/test_data_checks.py b/evalml/tests/data_checks_tests/test_data_checks.py
index c24f339a31..5737e5514d 100644
--- a/evalml/tests/data_checks_tests/test_data_checks.py
+++ b/evalml/tests/data_checks_tests/test_data_checks.py
@@ -1,9 +1,13 @@
+import pandas as pd
+
+from evalml.data_checks.basic_data_checks import BasicDataChecks
 from evalml.data_checks.data_check import DataCheck
 from evalml.data_checks.data_check_message import (
     DataCheckError,
     DataCheckWarning
 )
 from evalml.data_checks.data_checks import DataChecks
+from evalml.data_checks.empty_data_checks import EmptyDataChecks
 
 
 def test_data_checks(X_y):
@@ -27,8 +31,24 @@ def validate(self, X, y):
 
     data_checks_list = [MockDataCheck(), MockDataCheckWarning(), MockDataCheckError(), MockDataCheckErrorAndWarning()]
     data_checks = DataChecks(data_checks=data_checks_list)
-    errors_warnings = data_checks.validate(X, y)
-    assert errors_warnings == [DataCheckWarning("warning one", "MockDataCheckWarning"),
-                               DataCheckError("error one", "MockDataCheckError"),
-                               DataCheckError("error two", "MockDataCheckErrorAndWarning"),
-                               DataCheckWarning("warning two", "MockDataCheckErrorAndWarning")]
+    messages = data_checks.validate(X, y)
+    assert messages == [DataCheckWarning("warning one", "MockDataCheckWarning"),
+                        DataCheckError("error one", "MockDataCheckError"),
+                        DataCheckError("error two", "MockDataCheckErrorAndWarning"),
+                        DataCheckWarning("warning two", "MockDataCheckErrorAndWarning")]
+
+
+def test_empty_data_checks(X_y):
+    X, y = X_y
+    data_checks = EmptyDataChecks()
+    messages = data_checks.validate(X, y)
+    assert messages == []
+
+
+def test_basic_data_checks(X_y):
+    X = pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
+                      'all_null': [None, None, None, None, None],
+                      'no_null': [1, 2, 3, 4, 5]})
+    data_checks = BasicDataChecks()
+    messages = data_checks.validate(X)
+    assert messages == [DataCheckWarning("Columns 'all_null' are more than 95.0% null", "DetectHighlyNullDataCheck")]

From 583ca0e104a9e3e881c538b492942751509cba50 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Tue, 5 May 2020 18:38:38 -0400
Subject: [PATCH 06/14] codecov


From 312d7f024e7dfe81592ba81f1347d34ca8f28adf Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 13:08:56 -0400
Subject: [PATCH 07/14] address PR comments

---
 docs/source/changelog.rst                     |  2 +-
 evalml/data_checks/__init__.py                |  3 +-
 ..._data_checks.py => default_data_checks.py} |  2 +-
 .../detect_highly_null_data_check.py          | 15 ++---
 .../{empty_data_checks.py => utils.py}        |  0
 .../data_checks_tests/test_data_check.py      | 59 ++++++++++++-------
 .../data_checks_tests/test_data_checks.py     | 12 ++--
 7 files changed, 53 insertions(+), 40 deletions(-)
 rename evalml/data_checks/{basic_data_checks.py => default_data_checks.py} (90%)
 rename evalml/data_checks/{empty_data_checks.py => utils.py} (100%)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index f12805c45f..deb5e66413 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -4,7 +4,7 @@ Changelog
 ---------
 **Future Releases**
     * Enhancements
-        * Port over highly-null guardrail as a data check and define `BasicDataChecks` and `DisableDataChecks` classes :pr:`745`
+        * Port over highly-null guardrail as a data check and define `DefaultDataChecks` and `DisableDataChecks` classes :pr:`745`
     * Fixes
     * Changes
         * Cleanup pipeline `score` code, and cleanup codecov :pr:`711`
diff --git a/evalml/data_checks/__init__.py b/evalml/data_checks/__init__.py
index 1f75ba334b..1c9262462f 100644
--- a/evalml/data_checks/__init__.py
+++ b/evalml/data_checks/__init__.py
@@ -4,4 +4,5 @@
 from .data_check_message import DataCheckMessage, DataCheckWarning, DataCheckError
 from .data_check_message_type import DataCheckMessageType
 from .detect_highly_null_data_check import DetectHighlyNullDataCheck
-from .basic_data_checks import BasicDataChecks
+from .default_data_checks import DefaultDataChecks
+from .utils import EmptyDataChecks
diff --git a/evalml/data_checks/basic_data_checks.py b/evalml/data_checks/default_data_checks.py
similarity index 90%
rename from evalml/data_checks/basic_data_checks.py
rename to evalml/data_checks/default_data_checks.py
index bcc88276fa..8bd8d352be 100644
--- a/evalml/data_checks/basic_data_checks.py
+++ b/evalml/data_checks/default_data_checks.py
@@ -2,7 +2,7 @@
 from .detect_highly_null_data_check import DetectHighlyNullDataCheck
 
 
-class BasicDataChecks(DataChecks):
+class DefaultDataChecks(DataChecks):
     def __init__(self, data_checks=None):
         """
         A collection of data checks.
diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 8051e70e4d..1c622f2ac8 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -10,7 +10,8 @@ def __init__(self, percent_threshold=0.95):
         """Checks if there are any highly-null columns in the input.
 
         Arguments:
-            percent_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to 0.95
+            percent_threshold(float): If the percentage of values in an input feature exceeds this amount,
+                that feature will be considered highly-null. Defaults to 0.95.
 
         """
         if percent_threshold < 0 or percent_threshold > 1:
@@ -33,17 +34,11 @@ def validate(self, X, y=None):
             ...    'no_null': [1, 2, 3, 4, 5]
             ... })
             >>> null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
-            >>> null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
-            True
+            >>> assert null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
         """
-        messages = []
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
         percent_null = (X.isnull().mean()).to_dict()
         highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
-        if len(highly_null_cols) > 0:
-            col_names_str = ', '.join([f"'{name}'" for name in list(highly_null_cols.keys())])
-            warning_msg = "Columns {} are more than {}% null".format(col_names_str, self.percent_threshold * 100)
-            warning = DataCheckWarning(warning_msg, self.name)
-            messages.append(warning)
-        return messages
+        warning_msg = "Column '{}' is {}% or more null"
+        return [DataCheckWarning(warning_msg.format(col_name, self.percent_threshold * 100), self.name) for col_name in highly_null_cols]
diff --git a/evalml/data_checks/empty_data_checks.py b/evalml/data_checks/utils.py
similarity index 100%
rename from evalml/data_checks/empty_data_checks.py
rename to evalml/data_checks/utils.py
diff --git a/evalml/tests/data_checks_tests/test_data_check.py b/evalml/tests/data_checks_tests/test_data_check.py
index 85714224e6..a92d5ccc90 100644
--- a/evalml/tests/data_checks_tests/test_data_check.py
+++ b/evalml/tests/data_checks_tests/test_data_check.py
@@ -69,47 +69,62 @@ def validate(self, X, y=None):
 
 
 def test_highly_null_data_check_init():
-    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
-        DetectHighlyNullDataCheck(percent_threshold=-0.1)
-    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
-        DetectHighlyNullDataCheck(percent_threshold=1.1)
+    highly_null_check = DetectHighlyNullDataCheck()
+    assert highly_null_check.percent_threshold == 0.95
 
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.0)
+    assert highly_null_check.percent_threshold == 0
 
-def test_highly_null_data_check_empty_df():
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.1)
-    messages = highly_null_check.validate(pd.DataFrame())
-    assert messages == []
-
+    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.5)
+    assert highly_null_check.percent_threshold == 0.5
 
-def test_highly_null_data_check_no_warnings():
     highly_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
-    messages = highly_null_check.validate(pd.DataFrame({'lots_of_null': [None, None, None, None, 5], 'no_null': [1, 2, 3, 4, 5]}))
-    assert messages == []
+    assert highly_null_check.percent_threshold == 1.0
 
+    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(percent_threshold=-0.1)
+    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(percent_threshold=1.1)
 
-def test_highly_null_data_check_has_warnings():
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
-    messages = highly_null_check.validate(pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
-                                                        'all_null': [None, None, None, None, None],
-                                                        'no_null': [1, 2, 3, 4, 5]}))
-    assert messages == [DataCheckWarning("Columns 'lots_of_null', 'all_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+
+def test_highly_null_data_check_warnings():
+    data = pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
+                         'all_null': [None, None, None, None, None],
+                         'no_null': [1, 2, 3, 4, 5]})
+    no_null_check = DetectHighlyNullDataCheck(percent_threshold=0.0)
+    assert no_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 0.0% or more null", "DetectHighlyNullDataCheck"),
+                                            DataCheckWarning("Column 'all_null' is 0.0% or more null", "DetectHighlyNullDataCheck"),
+                                            DataCheckWarning("Column 'no_null' is 0.0% or more null", "DetectHighlyNullDataCheck")]
+    some_null_check = DetectHighlyNullDataCheck(percent_threshold=0.5)
+    assert some_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 50.0% or more null", "DetectHighlyNullDataCheck"),
+                                              DataCheckWarning("Column 'all_null' is 50.0% or more null", "DetectHighlyNullDataCheck")]
+    all_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
+    assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100.0% or more null", "DetectHighlyNullDataCheck")]
 
 
 def test_highly_null_data_check_input_formats():
     highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
 
+    # test empty pd.DataFrame
+    messages = highly_null_check.validate(pd.DataFrame())
+    assert messages == []
+
     #  test list
     messages = highly_null_check.validate([None, None, None, None, 5])
-    assert messages == [DataCheckWarning("Columns '0' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+    assert messages == [DataCheckWarning("Column '0' is 80.0% or more null", "DetectHighlyNullDataCheck")]
 
     #  test pd.Series
     messages = highly_null_check.validate(pd.Series([None, None, None, None, 5]))
-    assert messages == [DataCheckWarning("Columns '0' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+    assert messages == [DataCheckWarning("Column '0' is 80.0% or more null", "DetectHighlyNullDataCheck")]
 
     #  test 2D list
     messages = highly_null_check.validate([[None, None, None, None, 0], [None, None, None, "hi", 5]])
-    assert messages == [DataCheckWarning("Columns '0', '1', '2' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+    assert messages == [DataCheckWarning("Column '0' is 80.0% or more null", "DetectHighlyNullDataCheck"),
+                        DataCheckWarning("Column '1' is 80.0% or more null", "DetectHighlyNullDataCheck"),
+                        DataCheckWarning("Column '2' is 80.0% or more null", "DetectHighlyNullDataCheck")]
 
     # test np.array
     messages = highly_null_check.validate(np.array([[None, None, None, None, 0], [None, None, None, "hi", 5]]))
-    assert messages == [DataCheckWarning("Columns '0', '1', '2' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+    assert messages == [DataCheckWarning("Column '0' is 80.0% or more null", "DetectHighlyNullDataCheck"),
+                        DataCheckWarning("Column '1' is 80.0% or more null", "DetectHighlyNullDataCheck"),
+                        DataCheckWarning("Column '2' is 80.0% or more null", "DetectHighlyNullDataCheck")]
diff --git a/evalml/tests/data_checks_tests/test_data_checks.py b/evalml/tests/data_checks_tests/test_data_checks.py
index 5737e5514d..094dfa2c9e 100644
--- a/evalml/tests/data_checks_tests/test_data_checks.py
+++ b/evalml/tests/data_checks_tests/test_data_checks.py
@@ -1,13 +1,13 @@
 import pandas as pd
 
-from evalml.data_checks.basic_data_checks import BasicDataChecks
 from evalml.data_checks.data_check import DataCheck
 from evalml.data_checks.data_check_message import (
     DataCheckError,
     DataCheckWarning
 )
 from evalml.data_checks.data_checks import DataChecks
-from evalml.data_checks.empty_data_checks import EmptyDataChecks
+from evalml.data_checks.default_data_checks import DefaultDataChecks
+from evalml.data_checks.utils import EmptyDataChecks
 
 
 def test_data_checks(X_y):
@@ -45,10 +45,12 @@ def test_empty_data_checks(X_y):
     assert messages == []
 
 
-def test_basic_data_checks(X_y):
+def test_default_data_checks(X_y):
     X = pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
                       'all_null': [None, None, None, None, None],
+                      'also_all_null': [None, None, None, None, None],
                       'no_null': [1, 2, 3, 4, 5]})
-    data_checks = BasicDataChecks()
+    data_checks = DefaultDataChecks()
     messages = data_checks.validate(X)
-    assert messages == [DataCheckWarning("Columns 'all_null' are more than 95.0% null", "DetectHighlyNullDataCheck")]
+    assert messages == [DataCheckWarning("Column 'all_null' is 95.0% or more null", "DetectHighlyNullDataCheck"),
+                        DataCheckWarning("Column 'also_all_null' is 95.0% or more null", "DetectHighlyNullDataCheck")]

From d9d7a13aa657fc0dced3976d9a98ce028f5fe835 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 14:36:26 -0400
Subject: [PATCH 08/14] add edge cases

---
 evalml/data_checks/detect_highly_null_data_check.py | 13 +++++++++++--
 evalml/tests/data_checks_tests/test_data_check.py   |  7 +++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 1c622f2ac8..1d5bfa9123 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -39,6 +39,15 @@ def validate(self, X, y=None):
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
         percent_null = (X.isnull().mean()).to_dict()
-        highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
-        warning_msg = "Column '{}' is {}% or more null"
+        if self.percent_threshold == 0.0:
+            has_null_cols = {key: value for key, value in percent_null.items() if value > self.percent_threshold}
+            warning_msg = "Column '{}' is more than 0% null"
+            return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in has_null_cols]
+        elif self.percent_threshold == 1.0:
+            all_null_cols = {key: value for key, value in percent_null.items() if value == self.percent_threshold}
+            warning_msg = "Column '{}' is 100% null"
+            return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in all_null_cols]
+        else:
+            highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
+            warning_msg = "Column '{}' is {}% or more null"
         return [DataCheckWarning(warning_msg.format(col_name, self.percent_threshold * 100), self.name) for col_name in highly_null_cols]
diff --git a/evalml/tests/data_checks_tests/test_data_check.py b/evalml/tests/data_checks_tests/test_data_check.py
index a92d5ccc90..45e6aa3037 100644
--- a/evalml/tests/data_checks_tests/test_data_check.py
+++ b/evalml/tests/data_checks_tests/test_data_check.py
@@ -92,14 +92,13 @@ def test_highly_null_data_check_warnings():
                          'all_null': [None, None, None, None, None],
                          'no_null': [1, 2, 3, 4, 5]})
     no_null_check = DetectHighlyNullDataCheck(percent_threshold=0.0)
-    assert no_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 0.0% or more null", "DetectHighlyNullDataCheck"),
-                                            DataCheckWarning("Column 'all_null' is 0.0% or more null", "DetectHighlyNullDataCheck"),
-                                            DataCheckWarning("Column 'no_null' is 0.0% or more null", "DetectHighlyNullDataCheck")]
+    assert no_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is more than 0% null", "DetectHighlyNullDataCheck"),
+                                            DataCheckWarning("Column 'all_null' is more than 0% null", "DetectHighlyNullDataCheck")]
     some_null_check = DetectHighlyNullDataCheck(percent_threshold=0.5)
     assert some_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 50.0% or more null", "DetectHighlyNullDataCheck"),
                                               DataCheckWarning("Column 'all_null' is 50.0% or more null", "DetectHighlyNullDataCheck")]
     all_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
-    assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100.0% or more null", "DetectHighlyNullDataCheck")]
+    assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100% null", "DetectHighlyNullDataCheck")]
 
 
 def test_highly_null_data_check_input_formats():

From 0389f6baa1926a6fc825e4f2f29219df3583fcc5 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 14:43:41 -0400
Subject: [PATCH 09/14] cleanup via pr

---
 .../detect_highly_null_data_check.py          | 24 +++++++--------
 evalml/guardrails/utils.py                    |  8 ++---
 .../data_checks_tests/test_data_check.py      | 30 +++++++++----------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 1d5bfa9123..64e2503b18 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -6,17 +6,17 @@
 
 class DetectHighlyNullDataCheck(DataCheck):
 
-    def __init__(self, percent_threshold=0.95):
+    def __init__(self, pct_null_threshold=0.95):
         """Checks if there are any highly-null columns in the input.
 
         Arguments:
-            percent_threshold(float): If the percentage of values in an input feature exceeds this amount,
+            pct_null_threshold(float): If the percentage of values in an input feature exceeds this amount,
                 that feature will be considered highly-null. Defaults to 0.95.
 
         """
-        if percent_threshold < 0 or percent_threshold > 1:
-            raise ValueError("percent_threshold must be a float between 0 and 1, inclusive.")
-        self.percent_threshold = percent_threshold
+        if pct_null_threshold < 0 or pct_null_threshold > 1:
+            raise ValueError("pct_null_threshold must be a float between 0 and 1, inclusive.")
+        self.pct_null_threshold = pct_null_threshold
 
     def validate(self, X, y=None):
         """Checks if there are any highly-null columns in the input.
@@ -33,21 +33,21 @@ def validate(self, X, y=None):
             ...    'lots_of_null': [None, None, None, None, 5],
             ...    'no_null': [1, 2, 3, 4, 5]
             ... })
-            >>> null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
+            >>> null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.8)
             >>> assert null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
         """
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
         percent_null = (X.isnull().mean()).to_dict()
-        if self.percent_threshold == 0.0:
-            has_null_cols = {key: value for key, value in percent_null.items() if value > self.percent_threshold}
+        if self.pct_null_threshold == 0.0:
+            has_null_cols = {key: value for key, value in percent_null.items() if value > self.pct_null_threshold}
             warning_msg = "Column '{}' is more than 0% null"
             return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in has_null_cols]
-        elif self.percent_threshold == 1.0:
-            all_null_cols = {key: value for key, value in percent_null.items() if value == self.percent_threshold}
+        elif self.pct_null_threshold == 1.0:
+            all_null_cols = {key: value for key, value in percent_null.items() if value == self.pct_null_threshold}
             warning_msg = "Column '{}' is 100% null"
             return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in all_null_cols]
         else:
-            highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.percent_threshold}
+            highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.pct_null_threshold}
             warning_msg = "Column '{}' is {}% or more null"
-        return [DataCheckWarning(warning_msg.format(col_name, self.percent_threshold * 100), self.name) for col_name in highly_null_cols]
+        return [DataCheckWarning(warning_msg.format(col_name, self.pct_null_threshold * 100), self.name) for col_name in highly_null_cols]
diff --git a/evalml/guardrails/utils.py b/evalml/guardrails/utils.py
index 5d246df92f..385a1fe72c 100644
--- a/evalml/guardrails/utils.py
+++ b/evalml/guardrails/utils.py
@@ -37,12 +37,12 @@ def detect_label_leakage(X, y, threshold=.95):
     return corrs
 
 
-def detect_highly_null(X, percent_threshold=.95):
+def detect_highly_null(X, pct_null_threshold=.95):
     """ Checks if there are any highly-null columns in a dataframe.
 
     Args:
         X (pd.DataFrame) : features
-        percent_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to .95
+        pct_null_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to .95
 
     Returns:
         A dictionary of features with column name or index and their percentage of null values
@@ -52,14 +52,14 @@ def detect_highly_null(X, percent_threshold=.95):
         ...    'lots_of_null': [None, None, None, None, 5],
         ...    'no_null': [1, 2, 3, 4, 5]
         ... })
-        >>> detect_highly_null(df, percent_threshold=0.8)
+        >>> detect_highly_null(df, pct_null_threshold=0.8)
         {'lots_of_null': 0.8}
     """
     if not isinstance(X, pd.DataFrame):
         X = pd.DataFrame(X)
 
     percent_null = (X.isnull().mean()).to_dict()
-    highly_null_cols = {key: value for key, value in percent_null.items() if value >= percent_threshold}
+    highly_null_cols = {key: value for key, value in percent_null.items() if value >= pct_null_threshold}
     return highly_null_cols
 
 
diff --git a/evalml/tests/data_checks_tests/test_data_check.py b/evalml/tests/data_checks_tests/test_data_check.py
index 45e6aa3037..9f771f4aa7 100644
--- a/evalml/tests/data_checks_tests/test_data_check.py
+++ b/evalml/tests/data_checks_tests/test_data_check.py
@@ -70,39 +70,39 @@ def validate(self, X, y=None):
 
 def test_highly_null_data_check_init():
     highly_null_check = DetectHighlyNullDataCheck()
-    assert highly_null_check.percent_threshold == 0.95
+    assert highly_null_check.pct_null_threshold == 0.95
 
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.0)
-    assert highly_null_check.percent_threshold == 0
+    highly_null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.0)
+    assert highly_null_check.pct_null_threshold == 0
 
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.5)
-    assert highly_null_check.percent_threshold == 0.5
+    highly_null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.5)
+    assert highly_null_check.pct_null_threshold == 0.5
 
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
-    assert highly_null_check.percent_threshold == 1.0
+    highly_null_check = DetectHighlyNullDataCheck(pct_null_threshold=1.0)
+    assert highly_null_check.pct_null_threshold == 1.0
 
-    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
-        DetectHighlyNullDataCheck(percent_threshold=-0.1)
-    with pytest.raises(ValueError, match="percent_threshold must be a float between 0 and 1, inclusive."):
-        DetectHighlyNullDataCheck(percent_threshold=1.1)
+    with pytest.raises(ValueError, match="pct_null_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(pct_null_threshold=-0.1)
+    with pytest.raises(ValueError, match="pct_null_threshold must be a float between 0 and 1, inclusive."):
+        DetectHighlyNullDataCheck(pct_null_threshold=1.1)
 
 
 def test_highly_null_data_check_warnings():
     data = pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
                          'all_null': [None, None, None, None, None],
                          'no_null': [1, 2, 3, 4, 5]})
-    no_null_check = DetectHighlyNullDataCheck(percent_threshold=0.0)
+    no_null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.0)
     assert no_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is more than 0% null", "DetectHighlyNullDataCheck"),
                                             DataCheckWarning("Column 'all_null' is more than 0% null", "DetectHighlyNullDataCheck")]
-    some_null_check = DetectHighlyNullDataCheck(percent_threshold=0.5)
+    some_null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.5)
     assert some_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 50.0% or more null", "DetectHighlyNullDataCheck"),
                                               DataCheckWarning("Column 'all_null' is 50.0% or more null", "DetectHighlyNullDataCheck")]
-    all_null_check = DetectHighlyNullDataCheck(percent_threshold=1.0)
+    all_null_check = DetectHighlyNullDataCheck(pct_null_threshold=1.0)
     assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100% null", "DetectHighlyNullDataCheck")]
 
 
 def test_highly_null_data_check_input_formats():
-    highly_null_check = DetectHighlyNullDataCheck(percent_threshold=0.8)
+    highly_null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.8)
 
     # test empty pd.DataFrame
     messages = highly_null_check.validate(pd.DataFrame())

From e3b97f105add97e88784f862e2cc983e07bdd2dd Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 15:45:41 -0400
Subject: [PATCH 10/14] oops, revert to master

---
 evalml/guardrails/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evalml/guardrails/utils.py b/evalml/guardrails/utils.py
index 385a1fe72c..5d246df92f 100644
--- a/evalml/guardrails/utils.py
+++ b/evalml/guardrails/utils.py
@@ -37,12 +37,12 @@ def detect_label_leakage(X, y, threshold=.95):
     return corrs
 
 
-def detect_highly_null(X, pct_null_threshold=.95):
+def detect_highly_null(X, percent_threshold=.95):
     """ Checks if there are any highly-null columns in a dataframe.
 
     Args:
         X (pd.DataFrame) : features
-        pct_null_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to .95
+        percent_threshold(float): Require that percentage of null values to be considered "highly-null", defaults to .95
 
     Returns:
         A dictionary of features with column name or index and their percentage of null values
@@ -52,14 +52,14 @@ def detect_highly_null(X, pct_null_threshold=.95):
         ...    'lots_of_null': [None, None, None, None, 5],
         ...    'no_null': [1, 2, 3, 4, 5]
         ... })
-        >>> detect_highly_null(df, pct_null_threshold=0.8)
+        >>> detect_highly_null(df, percent_threshold=0.8)
         {'lots_of_null': 0.8}
     """
     if not isinstance(X, pd.DataFrame):
         X = pd.DataFrame(X)
 
     percent_null = (X.isnull().mean()).to_dict()
-    highly_null_cols = {key: value for key, value in percent_null.items() if value >= pct_null_threshold}
+    highly_null_cols = {key: value for key, value in percent_null.items() if value >= percent_threshold}
     return highly_null_cols
 
 
From 0f8f160aa35d5fdbd72379df049c1b3789dfd5af Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 19:02:33 -0400
Subject: [PATCH 11/14] clean up docstr

---
 evalml/data_checks/detect_highly_null_data_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 64e2503b18..97bce77158 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -34,7 +34,7 @@ def validate(self, X, y=None):
             ...    'no_null': [1, 2, 3, 4, 5]
             ... })
             >>> null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.8)
-            >>> assert null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' are more than 80.0% null", "DetectHighlyNullDataCheck")]
+            >>> assert null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' is 80.0% or more null", "DetectHighlyNullDataCheck")]
         """
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)

From 59dcb4d191533880607adc4e943cb3b663698187 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 23:37:32 -0400
Subject: [PATCH 12/14] cleanup

---
 evalml/data_checks/default_data_checks.py           | 2 +-
 evalml/data_checks/detect_highly_null_data_check.py | 9 +++------
 evalml/tests/data_checks_tests/test_data_check.py   | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/evalml/data_checks/default_data_checks.py b/evalml/data_checks/default_data_checks.py
index 8bd8d352be..d057fcc7d9 100644
--- a/evalml/data_checks/default_data_checks.py
+++ b/evalml/data_checks/default_data_checks.py
@@ -5,7 +5,7 @@
 class DefaultDataChecks(DataChecks):
     def __init__(self, data_checks=None):
         """
-        A collection of data checks.
+        A collection of basic data checks.
 
         Arguments:
             data_checks (list (DataCheck)): Ignored.
diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 97bce77158..3e48118269 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -40,14 +40,11 @@ def validate(self, X, y=None):
             X = pd.DataFrame(X)
         percent_null = (X.isnull().mean()).to_dict()
         if self.pct_null_threshold == 0.0:
-            has_null_cols = {key: value for key, value in percent_null.items() if value > self.pct_null_threshold}
+            all_null_cols = {key: value for key, value in percent_null.items() if value > 0.0}
             warning_msg = "Column '{}' is more than 0% null"
-            return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in has_null_cols]
-        elif self.pct_null_threshold == 1.0:
-            all_null_cols = {key: value for key, value in percent_null.items() if value == self.pct_null_threshold}
-            warning_msg = "Column '{}' is 100% null"
             return [DataCheckWarning(warning_msg.format(col_name), self.name) for col_name in all_null_cols]
         else:
             highly_null_cols = {key: value for key, value in percent_null.items() if value >= self.pct_null_threshold}
             warning_msg = "Column '{}' is {}% or more null"
-        return [DataCheckWarning(warning_msg.format(col_name, self.pct_null_threshold * 100), self.name) for col_name in highly_null_cols]
+
+            return [DataCheckWarning(warning_msg.format(col_name, self.pct_null_threshold * 100), self.name) for col_name in highly_null_cols]
diff --git a/evalml/tests/data_checks_tests/test_data_check.py b/evalml/tests/data_checks_tests/test_data_check.py
index 9f771f4aa7..d683a2545f 100644
--- a/evalml/tests/data_checks_tests/test_data_check.py
+++ b/evalml/tests/data_checks_tests/test_data_check.py
@@ -98,7 +98,7 @@ def test_highly_null_data_check_warnings():
     assert some_null_check.validate(data) == [DataCheckWarning("Column 'lots_of_null' is 50.0% or more null", "DetectHighlyNullDataCheck"),
                                               DataCheckWarning("Column 'all_null' is 50.0% or more null", "DetectHighlyNullDataCheck")]
     all_null_check = DetectHighlyNullDataCheck(pct_null_threshold=1.0)
-    assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100% null", "DetectHighlyNullDataCheck")]
+    assert all_null_check.validate(data) == [DataCheckWarning("Column 'all_null' is 100.0% or more null", "DetectHighlyNullDataCheck")]
 
 
 def test_highly_null_data_check_input_formats():

From 865bce31499992163d7fd5d1e44dd6a2f9662b43 Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Thu, 7 May 2020 23:47:51 -0400
Subject: [PATCH 13/14] cleanup

---
 evalml/data_checks/detect_highly_null_data_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index 3e48118269..fb10fe6804 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -34,7 +34,7 @@ def validate(self, X, y=None):
             ...    'no_null': [1, 2, 3, 4, 5]
             ... })
             >>> null_check = DetectHighlyNullDataCheck(pct_null_threshold=0.8)
-            >>> assert null_check.validate(df) == [DataCheckWarning("Columns 'lots_of_null' is 80.0% or more null", "DetectHighlyNullDataCheck")]
+            >>> assert null_check.validate(df) == [DataCheckWarning("Column 'lots_of_null' is 80.0% or more null", "DetectHighlyNullDataCheck")]
         """
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)

From 41fc203a962b00e17477a603c35d0945fc084dbd Mon Sep 17 00:00:00 2001
From: Angela Lin <angela97lin@gmail.com>
Date: Fri, 8 May 2020 14:11:19 -0400
Subject: [PATCH 14/14] update docstr

---
 evalml/data_checks/detect_highly_null_data_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evalml/data_checks/detect_highly_null_data_check.py b/evalml/data_checks/detect_highly_null_data_check.py
index fb10fe6804..079c77fba6 100644
--- a/evalml/data_checks/detect_highly_null_data_check.py
+++ b/evalml/data_checks/detect_highly_null_data_check.py
@@ -10,7 +10,7 @@ def __init__(self, pct_null_threshold=0.95):
         """Checks if there are any highly-null columns in the input.
 
         Arguments:
-            pct_null_threshold(float): If the percentage of values in an input feature exceeds this amount,
+            pct_null_threshold(float): If the percentage of NaN values in an input feature exceeds this amount,
                 that feature will be considered highly-null. Defaults to 0.95.
 
         """