-
Notifications
You must be signed in to change notification settings - Fork 246
/
under_annotated_segments_test.py
151 lines (124 loc) · 6.92 KB
/
under_annotated_segments_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""Test for the NLP UnderAnnotatedSegments check"""
import numpy as np
from hamcrest import assert_that, calling, close_to, equal_to, has_items, raises
from deepchecks.core.errors import DeepchecksProcessError
from deepchecks.nlp.checks import UnderAnnotatedMetaDataSegments, UnderAnnotatedPropertySegments
from tests.base.utils import equal_condition_result
def test_tweet_emotion_properties(tweet_emotion_train_test_textdata):
# Arrange
_, test = tweet_emotion_train_test_textdata
test = test.copy()
test._label = np.asarray(list(test._label[:round(len(test._label) / 2)]) + [None] * round(len(test._label) / 2),
dtype=object)
check = UnderAnnotatedPropertySegments().add_condition_segments_annotation_ratio_greater_than(0.5)
# Act
result = check.run(test)
condition_result = check.conditions_decision(result)
# Assert
assert_that(condition_result, has_items(
equal_condition_result(is_pass=False,
details=r'Most under annotated segment has annotation ratio of 31.43%.',
name=r'In all segments annotation ratio should be greater than 50%.')
))
assert_that(result.value['avg_score'], close_to(0.5, 0.001))
assert_that(len(result.value['weak_segments_list']), close_to(33, 1))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.314, 0.01))
def test_tweet_emotion_metadata(tweet_emotion_train_test_textdata):
# Arrange
_, test = tweet_emotion_train_test_textdata
test = test.copy()
test._label = np.asarray(list(test._label[:round(len(test._label) / 2)]) + [None] * round(len(test._label) / 2),
dtype=object)
check = UnderAnnotatedMetaDataSegments().add_condition_segments_relative_performance_greater_than()
# Act
result = check.run(test)
condition_result = check.conditions_decision(result)
# Assert
assert_that(condition_result, has_items(
equal_condition_result(is_pass=False,
details='Found a segment with annotation ratio of 0.366 in comparison to an average score of 0.5 in sampled data.',
name='The relative performance of weakest segment is greater than 80% of average model performance.')
))
assert_that(result.value['avg_score'], close_to(0.5, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(5))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.366, 0.01))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_age'))
def test_tweet_emotion_metadata_interesting_segment(tweet_emotion_train_test_textdata):
# Arrange
_, test = tweet_emotion_train_test_textdata
test = test.copy()
idx_to_change = test.metadata[(test.metadata['user_age'] > 30) & (test.metadata['user_region'] == 'Europe')].index
label = test._label.copy().astype(object)
label[idx_to_change] = None
test._label = label
# Act
result = UnderAnnotatedMetaDataSegments().run(test)
# Assert
assert_that(result.value['avg_score'], close_to(0.844, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(6))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0, 0.01))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_age'))
def test_tweet_emotion_metadata_fully_annotated(tweet_emotion_train_test_textdata):
# Arrange
_, test = tweet_emotion_train_test_textdata
check = UnderAnnotatedMetaDataSegments().add_condition_segments_relative_performance_greater_than()
# Act & Assert
assert_that(
calling(check.run).with_args(test),
raises(DeepchecksProcessError, 'Check was unable to find under annotated segments. This is expected if '
'your data is well annotated. If this is not the case, try increasing '
'n_samples or supply more metadata.')
)
def test_token_classification_dataset(small_wikiann_train_test_text_data):
# Arrange
data, _ = small_wikiann_train_test_text_data
data = data.copy()
data._label = np.asarray(list(data._label[:40]) + [None] * 10, dtype=object)
data.calculate_default_properties(include_long_calculation_properties=False)
check = UnderAnnotatedPropertySegments().add_condition_segments_relative_performance_greater_than()
# Act
result = check.run(data)
condition_result = check.conditions_decision(result)
# Assert
assert_that(condition_result, has_items(
equal_condition_result(is_pass=False,
details='Found a segment with annotation ratio of 0.375 in comparison to an '
'average score of 0.8 in sampled data.',
name='The relative performance of weakest segment is greater than 80% of average model '
'performance.')
))
assert_that(result.value['avg_score'], close_to(0.8, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(15))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.375, 0.01))
def test_multilabel_dataset(multilabel_mock_dataset_and_probabilities):
# Arrange
data, _ = multilabel_mock_dataset_and_probabilities
data = data.copy()
assert_that(data.is_multi_label_classification(), equal_to(True))
data._label = np.asarray(list(data._label[:round(len(data._label) / 2)]) + [None] * round(len(data._label) / 2),
dtype=object)
check = UnderAnnotatedMetaDataSegments().add_condition_segments_relative_performance_greater_than()
# Act
result = check.run(data)
condition_result = check.conditions_decision(result)
# Assert
assert_that(condition_result, has_items(
equal_condition_result(is_pass=False,
details='Found a segment with annotation ratio of 0.326 in comparison to an average '
'score of 0.5 in sampled data.',
name='The relative performance of weakest segment is greater than 80% of average model '
'performance.')
))
assert_that(result.value['avg_score'], close_to(0.5, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(5))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.326, 0.01))