alteryx · angela97lin · Nov 22, 2021 · Nov 19, 2021 · Nov 19, 2021 · Nov 21, 2021
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -16,6 +16,7 @@ Release Notes
     * Changes
         * Delete ``predict_uses_y`` estimator attribute :pr:`3069`
         * Change ``DateTimeFeaturizer`` to use corresponding Featuretools primitives :pr:`3081`
+        * Updated ``TargetDistributionDataCheck`` to return metadata details as floats rather strings :pr:`3085`
     * Documentation Changes
         * Updated docs to use data check action methods rather than manually cleaning data :pr:`3050`
     * Testing Changes

diff --git a/evalml/data_checks/target_distribution_data_check.py b/evalml/data_checks/target_distribution_data_check.py
@@ -38,7 +38,7 @@ def validate(self, X, y):
             ...                   "data_check_name": "TargetDistributionDataCheck",
             ...                   "level": "warning",
             ...                   "code": "TARGET_LOGNORMAL_DISTRIBUTION",
-            ...                   "details": {"shapiro-statistic/pvalue": '0.8/0.045', "columns": None, "rows": None}}],
+            ...                   "details": {"normalization_method": "shapiro", "statistic": 0.8, "p-value": 0.045, "columns": None, "rows": None}}],
             ...     "actions": [{'code': 'TRANSFORM_TARGET',
             ...                  "data_check_name": "TargetDistributionDataCheck",
             ...                  'metadata': {'transformation_strategy': 'lognormal',
@@ -95,32 +95,16 @@ def validate(self, X, y):
             )
             return results
 
-        normalization_test = shapiro if len(y) <= 5000 else jarque_bera
-        normalization_test_string = "shapiro" if len(y) <= 5000 else "jarque_bera"
-        # Check if a normal distribution is detected with p-value above 0.05
-        if normalization_test(y).pvalue >= 0.05:
-            return results
-
-        y_new = round(y, 6)
-        if any(y <= 0):
-            y_new = y + abs(y.min()) + 1
-
-        y_new = y_new[
-            y_new < (y_new.mean() + 3 * round(y.std(), 3))
-        ]  # Drop values greater than 3 standard deviations
-        norm_test_og = normalization_test(y_new)
-        norm_test_log = normalization_test(np.log(y_new))
-
-        log_detected = False
-
-        # If the p-value of the log transformed target is greater than or equal to the p-value of the original target
-        # with outliers dropped, then it would imply that the log transformed target has more of a normal distribution
-        if norm_test_log.pvalue >= norm_test_og.pvalue:
-            log_detected = True
-
-        if log_detected:
+        (
+            is_log_distribution,
+            normalization_test_string,
+            norm_test_og,
+        ) = _detect_log_distribution_helper(y)
+        if is_log_distribution:
             details = {
-                f"{normalization_test_string}-statistic/pvalue": f"{round(norm_test_og.statistic, 1)}/{round(norm_test_og.pvalue, 3)}"
+                "normalization_method": normalization_test_string,
+                "statistic": round(norm_test_og.statistic, 1),
+                "p-value": round(norm_test_og.pvalue, 3),
             }
             results["warnings"].append(
                 DataCheckWarning(
@@ -142,3 +126,27 @@ def validate(self, X, y):
             )
 
         return results
+
+
+def _detect_log_distribution_helper(y):
+    """Helper method to detect log distribution. Returns boolean, the normalization test used, and test statistics."""
+    normalization_test = shapiro if len(y) <= 5000 else jarque_bera
+    normalization_test_string = "shapiro" if len(y) <= 5000 else "jarque_bera"
+    # Check if a normal distribution is detected with p-value above 0.05
+    if normalization_test(y).pvalue >= 0.05:
+        return False, normalization_test_string, None
+
+    y_new = round(y, 6)
+    if any(y <= 0):
+        y_new = y + abs(y.min()) + 1
+    y_new = y_new[
+        y_new < (y_new.mean() + 3 * round(y.std(), 3))
+    ]  # Drop values greater than 3 standard deviations
+    norm_test_og = normalization_test(y_new)
+    norm_test_log = normalization_test(np.log(y_new))
+
+    # If the p-value of the log transformed target is greater than or equal to the p-value of the original target
+    # with outliers dropped, then it would imply that the log transformed target has more of a normal distribution
+    if norm_test_log.pvalue >= norm_test_og.pvalue:
+        return True, normalization_test_string, norm_test_og
+    return False, normalization_test_string, norm_test_og
diff --git a/evalml/tests/data_checks_tests/test_target_distribution_data_check.py b/evalml/tests/data_checks_tests/test_target_distribution_data_check.py
@@ -120,7 +120,9 @@ def test_target_distribution_data_check_warning_action(
         test_og = statistic(y)
 
         details = {
-            f"{name}-statistic/pvalue": f"{round(test_og.statistic, 1)}/{round(test_og.pvalue, 3)}"
+            "normalization_method": name,
+            "statistic": round(test_og.statistic, 1),
+            "p-value": round(test_og.pvalue, 3),
         }
         assert target_dist_ == {
             "warnings": [