Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename is english to english text #2648

Merged
merged 1 commit into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions deepchecks/nlp/utils/text_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def language(
return language_code


def is_english(
def english_text(
text: str,
lang_certainty_threshold: float = 0.8,
fasttext_model: Optional[Dict[object, Any]] = None,
Expand Down Expand Up @@ -512,7 +512,7 @@ class TextProperty(TypedDict):

ALL_PROPERTIES: Tuple[TextProperty, ...] = \
(
{'name': 'Is English', 'method': is_english, 'output_type': 'categorical'},
{'name': 'English Text', 'method': english_text, 'output_type': 'categorical'},
{'name': 'URLs Count', 'method': urls_count, 'output_type': 'numeric'},
{'name': 'Email Addresses Count', 'method': email_addresses_count, 'output_type': 'numeric'},
{'name': 'Unique URLs Count', 'method': unique_urls_count, 'output_type': 'numeric'},
Expand Down Expand Up @@ -563,7 +563,7 @@ class TextProperty(TypedDict):
'URLs Count': 'Number of URLS per text sample',
'Email Addresses Count': 'Number of email addresses per text sample',
'Unique URLs Count': 'Number of unique URLS per text sample',
'Is English': 'Whether the text is in English (1) or not (0)',
'English Text': 'Whether the text is in English (1) or not (0)',
'Unique Email Addresses Count': 'Number of unique email addresses per text sample',
'Unique Syllables Count': 'Number of unique syllables per text sample',
'Reading Time': 'Time taken in seconds to read a text sample',
Expand Down
12 changes: 6 additions & 6 deletions tests/nlp/utils/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from deepchecks.core.errors import DeepchecksValueError
from deepchecks.nlp.utils.text_properties import (_sample_for_property, calculate_builtin_properties,
is_english)
english_text)
from deepchecks.nlp.utils.text_properties_models import MODELS_STORAGE, _get_transformer_model


Expand Down Expand Up @@ -344,15 +344,15 @@ def test_calculate_average_syllable_count(tweet_emotion_train_test_textdata):
assert_that(result_none_text['Average Syllable Length'], equal_to([np.nan]))


def test_calcualte_is_english_property():
def test_calcualte_english_text_property():
data = ['This is a sentence in English.', 'Это предложение на русском языке.']
result = calculate_builtin_properties(data, include_properties=['Is English'])[0]
assert_that(result['Is English'], equal_to([True, False]))
result = calculate_builtin_properties(data, include_properties=['English Text'])[0]
assert_that(result['English Text'], equal_to([True, False]))


def test_calcualte_is_english_property_without_language_precalculation():
def test_calcualte_english_text_property_without_language_precalculation():
data = ['This is a sentence in English.', 'Это предложение на русском языке.']
assert_that([is_english(data[0]), is_english(data[1])], equal_to([True, False]))
assert_that([english_text(data[0]), english_text(data[1])], equal_to([True, False]))


def test_include_properties():
Expand Down