Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add isna, notna functions #210

Merged
merged 4 commits into from May 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/conf.py
Expand Up @@ -66,11 +66,11 @@

extlinks = {
"pandas_api_docs": (
"https://pandas.pydata.org/pandas-docs/version/0.25.3/reference/api/%s.html",
"https://pandas.pydata.org/pandas-docs/stable/reference/api/%s.html",
"",
),
"pandas_user_guide": (
"https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html",
"https://pandas.pydata.org/pandas-docs/stable/user_guide/%s.html",
"Pandas User Guide/",
),
"es_api_docs": (
Expand Down
6 changes: 6 additions & 0 deletions docs/source/reference/api/eland.Series.isna.rst
@@ -0,0 +1,6 @@
eland.Series.isna
==================

.. currentmodule:: eland

.. automethod:: Series.isna
6 changes: 6 additions & 0 deletions docs/source/reference/api/eland.Series.notna.rst
@@ -0,0 +1,6 @@
eland.Series.notna
==================

.. currentmodule:: eland

.. automethod:: Series.notna
2 changes: 2 additions & 0 deletions docs/source/reference/series.rst
Expand Up @@ -74,6 +74,8 @@ Reindexing / selection / label manipulation
:toctree: api/

Series.rename
Series.isna
Series.notna

Plotting
~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion eland/filter.py
Expand Up @@ -141,7 +141,7 @@ def __init__(self, field: str, value: str) -> None:
class IsNull(BooleanFilter):
def __init__(self, field: str) -> None:
super().__init__()
self._filter = {"missing": {"field": field}}
self._filter = {"bool": {"must_not": {"exists": {"field": field}}}}


class NotNull(BooleanFilter):
Expand Down
37 changes: 37 additions & 0 deletions eland/series.py
Expand Up @@ -39,6 +39,8 @@
LessEqual,
ScriptFilter,
IsIn,
IsNull,
NotNull,
)


Expand Down Expand Up @@ -468,6 +470,41 @@ def isin(self, other):
else:
raise NotImplementedError(other, type(other))

def isna(self):
sethmlarson marked this conversation as resolved.
Show resolved Hide resolved
"""
Detect missing values.

Returns
-------
eland.Series
Mask of bool values for each element in Series that indicates whether an element is not an NA value.

See Also
--------
:pandas_api_docs:`pandas.Series.isna`
"""
return IsNull(field=self.name)

isnull = isna

def notna(self):
"""
Detect existing (non-missing) values.

Returns
-------
eland.Series
Mask of bool values for each element in Series that indicates whether an element is not an NA value

See Also
--------
:pandas_api_docs:`pandas.Series.notna`

"""
return NotNull(field=self.name)

notnull = notna

@property
def ndim(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion eland/tests/operators/test_operators_pytest.py
Expand Up @@ -31,7 +31,7 @@ def test_leaf_boolean_filter(self):
assert Like("a", "a*b").build() == {"wildcard": {"a": "a*b"}}
assert Rlike("a", "a*b").build() == {"regexp": {"a": "a*b"}}
assert Startswith("a", "jj").build() == {"prefix": {"a": "jj"}}
assert IsNull("a").build() == {"missing": {"field": "a"}}
assert IsNull("a").build() == {"bool": {"must_not": {"exists": {"field": "a"}}}}
assert NotNull("a").build() == {"exists": {"field": "a"}}
assert ScriptFilter(
'doc["num1"].value > params.param1', lang="painless", params={"param1": 5}
Expand Down
44 changes: 44 additions & 0 deletions eland/tests/series/test_na_pytest.py
@@ -0,0 +1,44 @@
# Licensed to Elasticsearch B.V under one or more agreements.
# Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
# See the LICENSE file in the project root for more information

from eland import eland_to_pandas
from eland.tests.common import TestData
from eland.tests.common import assert_pandas_eland_frame_equal


class TestSeriesNA(TestData):
columns = [
"currency",
"customer_full_name",
"geoip.country_iso_code",
"geoip.region_name",
]

def test_not_isna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)

for column in self.columns:
not_isna_ed_ecommerce = ed_ecommerce[~ed_ecommerce[column].isna()]
not_isna_pd_ecommerce = pd_ecommerce[~pd_ecommerce[column].isna()]
assert_pandas_eland_frame_equal(
not_isna_pd_ecommerce, not_isna_ed_ecommerce
)

def test_isna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)

isna_ed_ecommerce = ed_ecommerce[ed_ecommerce["geoip.region_name"].isna()]
isna_pd_ecommerce = pd_ecommerce[pd_ecommerce["geoip.region_name"].isna()]
assert_pandas_eland_frame_equal(isna_pd_ecommerce, isna_ed_ecommerce)

def test_notna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)

for column in self.columns:
notna_ed_ecommerce = ed_ecommerce[ed_ecommerce[column].notna()]
notna_pd_ecommerce = pd_ecommerce[pd_ecommerce[column].notna()]
assert_pandas_eland_frame_equal(notna_pd_ecommerce, notna_ed_ecommerce)