/
test_data_checks.py
56 lines (44 loc) · 2.2 KB
/
test_data_checks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
from evalml.data_checks.data_check import DataCheck
from evalml.data_checks.data_check_message import (
DataCheckError,
DataCheckWarning
)
from evalml.data_checks.data_checks import DataChecks
from evalml.data_checks.default_data_checks import DefaultDataChecks
from evalml.data_checks.utils import EmptyDataChecks
def test_data_checks(X_y):
X, y = X_y
class MockDataCheck(DataCheck):
def validate(self, X, y):
return []
class MockDataCheckWarning(DataCheck):
def validate(self, X, y):
return [DataCheckWarning("warning one", self.name)]
class MockDataCheckError(DataCheck):
def validate(self, X, y):
return [DataCheckError("error one", self.name)]
class MockDataCheckErrorAndWarning(DataCheck):
def validate(self, X, y):
return [DataCheckError("error two", self.name), DataCheckWarning("warning two", self.name)]
data_checks_list = [MockDataCheck(), MockDataCheckWarning(), MockDataCheckError(), MockDataCheckErrorAndWarning()]
data_checks = DataChecks(data_checks=data_checks_list)
messages = data_checks.validate(X, y)
assert messages == [DataCheckWarning("warning one", "MockDataCheckWarning"),
DataCheckError("error one", "MockDataCheckError"),
DataCheckError("error two", "MockDataCheckErrorAndWarning"),
DataCheckWarning("warning two", "MockDataCheckErrorAndWarning")]
def test_empty_data_checks(X_y):
X, y = X_y
data_checks = EmptyDataChecks()
messages = data_checks.validate(X, y)
assert messages == []
def test_default_data_checks(X_y):
X = pd.DataFrame({'lots_of_null': [None, None, None, None, 5],
'all_null': [None, None, None, None, None],
'also_all_null': [None, None, None, None, None],
'no_null': [1, 2, 3, 4, 5]})
data_checks = DefaultDataChecks()
messages = data_checks.validate(X)
assert messages == [DataCheckWarning("Column 'all_null' is 95.0% or more null", "DetectHighlyNullDataCheck"),
DataCheckWarning("Column 'also_all_null' is 95.0% or more null", "DetectHighlyNullDataCheck")]