Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing value of sys_candidate feature when only one duckling entity is found #317

Merged
merged 2 commits into from Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions mindmeld/models/query_features.py
Expand Up @@ -556,9 +556,9 @@ def _extractor(query, resources):
for token_features in feat_seq:
for feature, value in token_features.items():
if feature.endswith('log_len'):
token_features[feature] = math.log(float(sum(value)) / len(value))
token_features[feature] = math.log((float(sum(value)) / len(value)) + 1)
else:
token_features[feature] = math.log(value)
token_features[feature] = math.log(value + 1) # Adjust value to be greater than 0
return feat_seq

return _extractor
Expand Down
26 changes: 13 additions & 13 deletions tests/test_query_features.py
Expand Up @@ -254,7 +254,7 @@ def test_sentiment_query_feature(
"sys_candidate|type:sys_interval|granularity:hour|pos:0|log_len",
"sys_candidate|type:sys_time|granularity:hour|pos:0|log_len",
],
[math.log(37 / 3), math.log(2)],
[math.log((37 / 3) + 1), math.log(2 + 1)],
-1,
),
# Test for sys_candidate features for normalized text
Expand All @@ -264,7 +264,7 @@ def test_sentiment_query_feature(
"sys_candidate|type:sys_interval|granularity:hour|pos:0|log_len",
"sys_candidate|type:sys_time|granularity:hour|pos:0|log_len",
],
[math.log(37 / 3), math.log(2)],
[math.log((37 / 3) + 1), math.log(2 + 1)],
-1,
),
],
Expand Down Expand Up @@ -440,8 +440,8 @@ def test_entity_query_features(
"00",
"el",
"lm",
0.0,
0.6931471805599453,
1.0986122886681098,
],
4,
),
Expand Down Expand Up @@ -545,21 +545,21 @@ def test_query_token_span_features(kwik_e_mart_nlp):
for feat in unexpected_features:
assert feat not in output_features[0]

assert output_features[0][expected_features[1]] == math.log(len('$2'))
assert math.isclose(output_features[0][expected_features[0]], math.log(1.5), rel_tol=1e-04)
assert output_features[0][expected_features[1]] == math.log(len('$2') + 1)
assert math.isclose(output_features[0][expected_features[0]], math.log(1.5 + 1), rel_tol=1e-04)

output_features = er.view_extracted_features('$20 5')

assert output_features[0][f'{feature_name}:0'] == math.log(6)
assert output_features[0][f'{feature_name}:0'] == math.log(6 + 1)
assert math.isclose(output_features[0][f'{feature_name}:0|log_len'],
math.log(3.833), rel_tol=1e-04)
assert output_features[0][f'{feature_name}:1'] == math.log(5)
math.log(3.833 + 1), rel_tol=1e-04)
assert output_features[0][f'{feature_name}:1'] == math.log(5 + 1)
assert math.isclose(output_features[0][f'{feature_name}:1|log_len'],
math.log(3.8), rel_tol=1e-04)
math.log(3.8 + 1), rel_tol=1e-04)

assert output_features[1][f'{feature_name}:-1'] == math.log(6)
assert output_features[1][f'{feature_name}:-1'] == math.log(6 + 1)
assert math.isclose(output_features[1][f'{feature_name}:-1|log_len'],
math.log(3.833), rel_tol=1e-04)
assert output_features[1][f'{feature_name}:0'] == math.log(5)
math.log(3.833 + 1), rel_tol=1e-04)
assert output_features[1][f'{feature_name}:0'] == math.log(5 + 1)
assert math.isclose(output_features[1][f'{feature_name}:0|log_len'],
math.log(3.8), rel_tol=1e-04)
math.log(3.8 + 1), rel_tol=1e-04)