Skip to content

Commit

Permalink
Refactoring test_no_variance_data_checks to reuse some of the mesages.
Browse files Browse the repository at this point in the history
  • Loading branch information
freddyaboulton committed Jun 29, 2020
1 parent 7f21f71 commit b5a75bd
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions evalml/tests/data_checks_tests/test_no_variance_data_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,26 @@
all_null_y = pd.Series([None] * 4)
two_distinct_with_nulls_y = pd.Series(([1] * 2) + ([None] * 2))

feature_0_unique = DataCheckError("Column feature has 0 unique value.", "NoVarianceDataCheck")
feature_1_unique = DataCheckError("Column feature has 1 unique value.", "NoVarianceDataCheck")
labels_0_unique = DataCheckError("The Labels have 0 unique value.", "NoVarianceDataCheck")
labels_1_unique = DataCheckError("The Labels have 1 unique value.", "NoVarianceDataCheck")

cases = [(all_distinct_X, all_distinct_y, True, []),
([1, 2, 3, 4], [1, 2, 3, 2], False, []),
(np.arange(12).reshape(4, 3), [1, 2, 3], True, []),
(all_null_X, all_distinct_y, False, [DataCheckError("Column feature has 0 unique value.", "NoVarianceDataCheck")]),
(all_null_X, [1] * 4, False, [DataCheckError("Column feature has 0 unique value.", "NoVarianceDataCheck"),
DataCheckError("The Labels have 1 unique value.", "NoVarianceDataCheck")]),
(all_null_X, all_distinct_y, True, [DataCheckError("Column feature has 1 unique value.", "NoVarianceDataCheck")]),
(all_distinct_X, all_null_y, True, [DataCheckError("The Labels have 1 unique value.", "NoVarianceDataCheck")]),
(all_distinct_X, all_null_y, False, [DataCheckError("The Labels have 0 unique value.", "NoVarianceDataCheck")]),
(all_null_X, all_distinct_y, False, [feature_0_unique]),
(all_null_X, [1] * 4, False, [feature_0_unique, labels_1_unique]),
(all_null_X, all_distinct_y, True, [feature_1_unique]),
(all_distinct_X, all_null_y, True, [labels_1_unique]),
(all_distinct_X, all_null_y, False, [labels_0_unique]),
(two_distinct_with_nulls_X, two_distinct_with_nulls_y, True,
[DataCheckWarning("Column feature has two unique values including nulls. Consider encoding the nulls for "
"this column to be useful for machine learning.", "NoVarianceDataCheck"),
DataCheckWarning("The Labels have two unique values including nulls. Consider encoding the nulls for "
"this column to be useful for machine learning.", "NoVarianceDataCheck")
]),
(two_distinct_with_nulls_X, two_distinct_with_nulls_y, False,
[DataCheckError("Column feature has 1 unique value.", "NoVarianceDataCheck"),
DataCheckError("The Labels have 1 unique value.", "NoVarianceDataCheck")])
(two_distinct_with_nulls_X, two_distinct_with_nulls_y, False, [feature_1_unique, labels_1_unique])
]


Expand Down

0 comments on commit b5a75bd

Please sign in to comment.