Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add global test for NaturalLanguage primitives #2429

Merged
merged 48 commits into from Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
fb2d20f
update docstrings
sbadithe Dec 23, 2022
d12a828
release notes
sbadithe Dec 23, 2022
18d546e
add all primitives to test
sbadithe Dec 23, 2022
d9cdfc3
update conftest to include strings that have triggered backtracking b…
sbadithe Dec 23, 2022
25ebff9
update conftest to include blank string
sbadithe Dec 23, 2022
ca3a7cc
lint and typo fix
sbadithe Dec 23, 2022
0f3d4f6
don't run on windows
sbadithe Dec 23, 2022
ca9cb82
lint
sbadithe Dec 23, 2022
8068586
Merge branch 'main' into add-randomized-test-for-natlangs
sbadithe Jan 5, 2023
9d99ad1
Update release_notes.rst
sbadithe Jan 5, 2023
41c3037
remove randomized tests
sbadithe Jan 6, 2023
a557abf
refactor fixtures + PR comments
sbadithe Jan 6, 2023
f5572a0
programmatically get primitives instead of manually importing them all
sbadithe Jan 6, 2023
72e2e86
typo
sbadithe Jan 6, 2023
b09799b
refactor
sbadithe Jan 6, 2023
87a467a
lowercase "Linux" to get it to run
sbadithe Jan 6, 2023
a9e833e
lint
sbadithe Jan 6, 2023
75bc136
use skip-if
sbadithe Jan 6, 2023
1aa0e8c
revert changes to utilities to see if that fixes the tests
sbadithe Jan 6, 2023
fdb2d67
Merge branch 'main' into add-randomized-test-for-natlangs
sbadithe Jan 6, 2023
f09efc2
typo fix
sbadithe Jan 6, 2023
20c0019
Merge remote-tracking branch 'origin/add-randomized-test-for-natlangs…
sbadithe Jan 6, 2023
deb7563
cleanup + refactoring
sbadithe Jan 6, 2023
e871b5c
refactor
sbadithe Jan 6, 2023
9939c22
lint
sbadithe Jan 6, 2023
ea385d6
cleanup
sbadithe Jan 6, 2023
3aac880
Merge branch 'main' into add-randomized-test-for-natlangs
sbadithe Jan 7, 2023
7c7dd8f
move method to private and fix skip boolean
sbadithe Jan 9, 2023
96b99e5
fix helper function, use pytest-timeout
sbadithe Jan 10, 2023
17d3e60
Merge remote-tracking branch 'origin/add-randomized-test-for-natlangs…
sbadithe Jan 10, 2023
7294328
Merge branch 'main' into add-randomized-test-for-natlangs
sbadithe Jan 10, 2023
792a77c
Update test_natural_language_primitives_terminate.py
sbadithe Jan 10, 2023
37d9aaa
debug run
sbadithe Jan 10, 2023
2b92b17
Merge remote-tracking branch 'origin/add-randomized-test-for-natlangs…
sbadithe Jan 10, 2023
529205c
debug run, remove skipif
sbadithe Jan 10, 2023
dd1e23a
debug run, try to fix test failures
sbadithe Jan 10, 2023
f37aaf6
debug run, try to fix test failures
sbadithe Jan 10, 2023
8dfcb6c
try removing pytest-timeout
sbadithe Jan 10, 2023
0a8632e
try sorting test params
sbadithe Jan 10, 2023
5b836df
update primitives assignment to make sure it takes primitive objects
sbadithe Jan 10, 2023
03c6d01
add back timeout
sbadithe Jan 10, 2023
11cb2f1
don't sort .values
sbadithe Jan 10, 2023
7b5f068
don't sort .values
sbadithe Jan 10, 2023
815c2e0
use tuple unpacking
sbadithe Jan 10, 2023
53df7b9
add explanatory comment; update variable naming
sbadithe Jan 10, 2023
7b18fe8
fix typo in release notes
sbadithe Jan 10, 2023
f7b2b40
delete space
sbadithe Jan 10, 2023
b7eb0e4
Merge branch 'main' into add-randomized-test-for-natlangs
sbadithe Jan 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/source/release_notes.rst
Expand Up @@ -11,12 +11,13 @@ Future Release
* Documentation Changes
* Minor fix to release notes (:pr:`2444`)
* Testing Changes
* Add test that checks for Natural Language primitives timing out against edge-case input (:pr:`2429`)
* Fix test compatibility with composeml 0.10 (:pr:`2439`)
* Minimum dependency unit test jobs do not abort if one job fails (:pr:`2437`)
* Run Looking Glass performance tests on merge to main (:pr:`2440`, :pr:`2441`)

Thanks to the following people for contributing to this release:
:user:`gsheni`, :user:`rwedge`, :user: `thehomebrewnerd`
:user:`gsheni`, :user:`rwedge`, :user:`sbadithe`, :user:`thehomebrewnerd`

v1.20.0 Jan 5, 2023
===================
Expand Down
62 changes: 46 additions & 16 deletions featuretools/primitives/utils.py
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
from woodwork import list_logical_types, list_semantic_tags
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import NaturalLanguage

import featuretools
from featuretools.primitives import NumberOfCommonWords
Expand All @@ -17,28 +18,57 @@
from featuretools.utils.gen_utils import Library, find_descendents


# returns all aggregation primitives, regardless of compatibility
def get_aggregation_primitives():
aggregation_primitives = set([])
def _get_primitives(primitive_kind):
"""Helper function that selects all primitives
that are instances of `primitive_kind`
"""
primitives = set()
for attribute_string in dir(featuretools.primitives):
attribute = getattr(featuretools.primitives, attribute_string)
if isclass(attribute):
if issubclass(attribute, featuretools.primitives.AggregationPrimitive):
if attribute.name:
aggregation_primitives.add(attribute)
return {prim.name.lower(): prim for prim in aggregation_primitives}
if issubclass(attribute, primitive_kind) and attribute.name:
primitives.add(attribute)
return {prim.name.lower(): prim for prim in primitives}


def get_aggregation_primitives():
"""Returns all aggregation primitives, regardless
of compatibility
"""
return _get_primitives(featuretools.primitives.AggregationPrimitive)


# returns all transform primitives, regardless of compatibility
def get_transform_primitives():
transform_primitives = set([])
for attribute_string in dir(featuretools.primitives):
attribute = getattr(featuretools.primitives, attribute_string)
if isclass(attribute):
if issubclass(attribute, featuretools.primitives.TransformPrimitive):
if attribute.name:
transform_primitives.add(attribute)
return {prim.name.lower(): prim for prim in transform_primitives}
"""Returns all transform primitives, regardless
of compatibility
"""
return _get_primitives(featuretools.primitives.TransformPrimitive)


def _get_natural_language_primitives():
"""Returns all Natural Language transform primitives,
regardless of compatibility
"""
transform_primitives = get_transform_primitives()

def _natural_language_in_input_type(primitive):
for input_type in primitive.input_types:
if isinstance(input_type, list):
if any(
isinstance(column_schema.logical_type, NaturalLanguage)
for column_schema in input_type
):
return True
else:
if isinstance(input_type.logical_type, NaturalLanguage):
return True
return False

return {
name: primitive
for name, primitive in transform_primitives.items()
if _natural_language_in_input_type(primitive)
}


def list_primitives():
Expand Down
8 changes: 8 additions & 0 deletions featuretools/tests/conftest.py
Expand Up @@ -864,3 +864,11 @@ class TestTransform(TransformPrimitive):
stack_on = []

return TestTransform


@pytest.fixture
def strings_that_have_triggered_errors_before():
return [
" ",
'"This Borderlands game here"" is the perfect conclusion to the ""Borderlands 3"" line, which focuses on the fans ""favorite character and gives the players the opportunity to close for a long time some very important questions about\'s character and the memorable scenery with which the players interact.',
]
@@ -0,0 +1,22 @@
import pandas as pd
import pytest

from featuretools.primitives.utils import _get_natural_language_primitives

TIMEOUT_THRESHOLD = 20


class TestNaturalLanguagePrimitivesTerminate:

# need to sort primitives to avoid pytest collection error
primitives = sorted(_get_natural_language_primitives().items())

@pytest.mark.timeout(TIMEOUT_THRESHOLD)
@pytest.mark.parametrize("primitive", [prim for _, prim in primitives])
def test_natlang_primitive_does_not_timeout(
self,
strings_that_have_triggered_errors_before,
primitive,
):
for text in strings_that_have_triggered_errors_before:
primitive().get_function()(pd.Series(text))
1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -71,6 +71,7 @@ test = [
"pytest-xdist >= 2.5.0",
"smart-open >= 5.0.0",
"urllib3 >= 1.26.5",
"pytest-timeout >= 2.1.0"
]
spark = [
"woodwork[spark] >= 0.18.0",
Expand Down