Remove dropna and fillna

has2k1 · Nov 2, 2017 · 3f50e96 · 3f50e96
1 parent 7410868
commit 3f50e96
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 284 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -19,8 +19,6 @@ One table verbs
    define
    distinct
    do
-   dropna
-   fillna
    group_by
    group_indices
    head

diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -76,12 +76,17 @@ API Changes
 - Using internal function for :class:`~plydata.one_table_verbs.summarize` that
   counts the number of elements in the current group changed from
   ``{n}`` to ``n()``.
+
 - You can now use piping with the two table verbs (the joins).
 
 - ``modify_where`` and ``define_where`` helper verbs have been removed.
   Using the new expression helper functions :class:`~plydata.expressions.case_when`
   and :class:`~plydata.expressions.if_else` is more readable.
 
+- Removed ``dropna`` and ``fillna`` in favour of using
+  :class:`~plydata.helper_verbs.call` with :meth:`pandas.DataFrame.dropna` and
+  :meth:`pandas.DataFrame.fillna`.
+
 
 v0.2.1
 ------

diff --git a/plydata/dataframe/one_table.py b/plydata/dataframe/one_table.py
@@ -7,16 +7,15 @@
 import pandas as pd
 
 from ..types import GroupedDataFrame
-from ..options import get_option, options
+from ..options import get_option
 from ..utils import Q, get_empty_env, regular_index, unique
 from .common import Evaluator, Selector
 from .common import _get_groups, _get_base_dataframe
 
 __all__ = ['arrange', 'create', 'define', 'distinct', 'do',
-           'dropna', 'fillna', 'group_by', 'group_indices',
-           'head',  'mutate', 'query', 'rename', 'sample_frac',
-           'sample_n', 'select', 'summarize', 'tail', 'ungroup',
-           'unique']
+           'group_by', 'group_indices', 'head',  'mutate',
+           'query', 'rename', 'sample_frac', 'sample_n',
+           'select', 'summarize', 'tail', 'ungroup', 'unique']
 
 
 def define(verb):
@@ -194,29 +193,6 @@ def tail(verb):
     return data
 
 
-def dropna(verb):
-    result = verb.data.dropna(
-        axis=verb.axis,
-        how=verb.how,
-        thresh=verb.thresh,
-        subset=verb.subset
-    )
-    return result
-
-
-def fillna(verb):
-    inplace = get_option('modify_input_data')
-    result = verb.data.fillna(
-        value=verb.value,
-        method=verb.method,
-        axis=verb.axis,
-        limit=verb.limit,
-        downcast=verb.downcast,
-        inplace=inplace
-    )
-    return result if not inplace else verb.data
-
-
 # Aggregations functions
 
 def _nth(arr, n):

diff --git a/plydata/one_table_verbs.py b/plydata/one_table_verbs.py
@@ -9,7 +9,7 @@
 __all__ = ['define', 'create', 'sample_n', 'sample_frac', 'select',
            'rename', 'distinct', 'unique', 'arrange', 'group_by',
            'ungroup', 'group_indices', 'summarize',
-           'query', 'do', 'head', 'tail', 'dropna', 'fillna',
+           'query', 'do', 'head', 'tail',
            # Aliases
            'summarise', 'mutate', 'transmute',
            ]
@@ -987,238 +987,6 @@ def __init__(self, n=5):
         self.n = n
 
 
-class dropna(DataOperator):
-    """
-    Remove rows or columns with missing values
-
-    This is a wrapper around :meth:`pandas.DataFrame.dropna`. It
-    is useful because you cannot :class:`query` ``NaN`` values.
-
-    Parameters
-    ----------
-    axis : {0 or 'index', 1 or 'columns'}, or tuple/list thereof
-        Pass tuple or list to drop on multiple axes
-    how : {'any', 'all'}
-        * any : if any NA values are present, drop that label
-        * all : if all values are NA, drop that label
-    thresh : int, default None
-        int value : require that many non-NA values
-    subset : array-like
-        Labels along other axis to consider, e.g. if you are
-        dropping rows these would be a list of columns to include
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> import numpy as np
-    >>> df = pd.DataFrame({
-    ...     'w': [1, 2, np.nan, 4, 5],
-    ...     'x': [np.nan, 2, np.nan, 4, 5],
-    ...     'y': [np.nan] * 4 + [5],
-    ...     'z': [np.nan] * 5
-    ... })
-    >>> df
-         w    x    y   z
-    0  1.0  NaN  NaN NaN
-    1  2.0  2.0  NaN NaN
-    2  NaN  NaN  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Drop rows with any ``NaN`` values
-
-    >>> df >> dropna()
-    Empty DataFrame
-    Columns: [w, x, y, z]
-    Index: []
-
-    Drop rows with all ``NaN`` values
-
-    >>> df >> dropna(how='all')
-         w    x    y   z
-    0  1.0  NaN  NaN NaN
-    1  2.0  2.0  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Drop rows with ``NaN`` values in the *x* column.
-
-    >>> df >> dropna(subset=['x'])
-         w    x    y   z
-    1  2.0  2.0  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Drop and keep rows atleast 3 ``non-NaN`` values
-
-    >>> df >> dropna(thresh=3)
-         w    x    y   z
-    4  5.0  5.0  5.0 NaN
-
-    Drop columns with all ``NaN`` values
-
-    >>> df >> dropna(axis=1, how='all')
-         w    x    y
-    0  1.0  NaN  NaN
-    1  2.0  2.0  NaN
-    2  NaN  NaN  NaN
-    3  4.0  4.0  NaN
-    4  5.0  5.0  5.0
-
-    Drop columns with any ``NaN`` values in row 3.
-
-    >>> df >> dropna(axis=1, subset=[3])
-         w    x
-    0  1.0  NaN
-    1  2.0  2.0
-    2  NaN  NaN
-    3  4.0  4.0
-    4  5.0  5.0
-    """
-
-    def __init__(self, axis=0, how='any', thresh=None, subset=None):
-        self.axis = axis
-        self.how = how
-        self.thresh = thresh
-        self.subset = subset
-
-
-class fillna(DataOperator):
-    """
-    Fill NA/NaN values using the specified method
-
-    This is a wrapper around :meth:`pandas.DataFrame.fillna`. It
-    is useful because you cannot :class:`modify_where` ``NaN``
-    values.
-
-    Parameters
-    ----------
-    value : scalar, dict, Series, or DataFrame
-        Value to use to fill holes (e.g. 0), alternately a
-        dict/Series/DataFrame of values specifying which value to
-        use for each index (for a Series) or column (for a DataFrame).
-        (values not in the dict/Series/DataFrame will not be filled).
-        This value cannot be a list.
-    method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
-        Method to use for filling holes in reindexed Series
-        pad / ffill: propagate last valid observation forward to next
-        valid backfill / bfill: use NEXT valid observation to fill gap
-    axis : {0 or 'index', 1 or 'columns'}
-    inplace : boolean, default False
-        If True, fill in place. Note: this will modify any
-        other views on this object, (e.g. a no-copy slice for a column
-        in a DataFrame).
-    limit : int, default None
-        If method is specified, this is the maximum number of
-        consecutive NaN values to forward/backward fill. In other
-        words, if there is a gap with more than this number of
-        consecutive NaNs, it will only be partially filled. If method
-        is not specified, this is the maximum number of entries along
-        the entire axis where NaNs will be filled. Must be greater
-        than 0 if not None.
-    downcast : dict, default is None
-        a dict of item->dtype of what to downcast if possible, or the
-        string 'infer' which will try to downcast to an appropriate
-        equal type (e.g. float64 to int64 if possible)
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> import numpy as np
-    >>> df = pd.DataFrame({
-    ...     'w': [1, 2, np.nan, 4, 5],
-    ...     'x': [np.nan, 2, np.nan, 4, 5],
-    ...     'y': [np.nan] * 4 + [5],
-    ...     'z': [np.nan] * 5
-    ... })
-    >>> df
-         w    x    y   z
-    0  1.0  NaN  NaN NaN
-    1  2.0  2.0  NaN NaN
-    2  NaN  NaN  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Replace all ``NaN`` values with -1.
-
-    >>> df >> fillna(-1)
-         w    x    y    z
-    0  1.0 -1.0 -1.0 -1.0
-    1  2.0  2.0 -1.0 -1.0
-    2 -1.0 -1.0 -1.0 -1.0
-    3  4.0  4.0 -1.0 -1.0
-    4  5.0  5.0  5.0 -1.0
-
-    Replace all ``NaN`` values with the first ``non-NaN`` value *above
-    in column*
-
-    >>> df >> fillna(method='ffill')
-         w    x    y   z
-    0  1.0  NaN  NaN NaN
-    1  2.0  2.0  NaN NaN
-    2  2.0  2.0  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Replace all ``NaN`` values with the first ``non-NaN`` value *below
-    in column*
-
-    >>> df >> fillna(method='bfill')
-         w    x    y   z
-    0  1.0  2.0  5.0 NaN
-    1  2.0  2.0  5.0 NaN
-    2  4.0  4.0  5.0 NaN
-    3  4.0  4.0  5.0 NaN
-    4  5.0  5.0  5.0 NaN
-
-    Replace atmost 2 ``NaN`` values with the first ``non-NaN`` value
-    *below in column*
-
-    >>> df >> fillna(method='bfill', limit=2)
-         w    x    y   z
-    0  1.0  2.0  NaN NaN
-    1  2.0  2.0  NaN NaN
-    2  4.0  4.0  5.0 NaN
-    3  4.0  4.0  5.0 NaN
-    4  5.0  5.0  5.0 NaN
-
-    Replace all ``NaN`` values with the first ``non-NaN`` value to the
-    *left in the row*
-
-    >>> df >> fillna(method='ffill', axis=1)
-         w    x    y    z
-    0  1.0  1.0  1.0  1.0
-    1  2.0  2.0  2.0  2.0
-    2  NaN  NaN  NaN  NaN
-    3  4.0  4.0  4.0  4.0
-    4  5.0  5.0  5.0  5.0
-
-    Replace all ``NaN`` values with the first ``non-NaN`` value to the
-    *right in the row*
-
-    >>> df >> fillna(method='bfill', axis=1)
-         w    x    y   z
-    0  1.0  NaN  NaN NaN
-    1  2.0  2.0  NaN NaN
-    2  NaN  NaN  NaN NaN
-    3  4.0  4.0  NaN NaN
-    4  5.0  5.0  5.0 NaN
-
-    Note
-    ----
-    If :obj:`plydata.options.modify_input_data` is ``True``,
-    :class:`modify_where` will modify the original dataframe.
-    """
-
-    def __init__(self, value=None, method=None, axis=None, limit=None,
-                 downcast=None):
-        self.value = value
-        self.method = method
-        self.axis = axis
-        self.limit = limit
-        self.downcast = downcast
-
-
 # Aliases
 mutate = define
 transmute = create

diff --git a/plydata/tests/test_dataframe.py b/plydata/tests/test_dataframe.py
@@ -10,7 +10,7 @@
                      rename, distinct, arrange, group_by, ungroup,
                      group_indices, summarize, query, do, head, tail,
                      tally, count, add_tally, add_count,
-                     fillna, call,
+                     call,
                      arrange_all, arrange_at, arrange_if,
                      create_all, create_at, create_if,
                      group_by_all, group_by_at, group_by_if,
@@ -761,16 +761,6 @@ def test_add_count():
     assert isinstance(result, GroupedDataFrame)
 
 
-def test_dropna():
-    # wraps around pandas and doctests are sufficient
-    pass
-
-
-def test_fillna():
-    # wraps around pandas and doctests are adequate
-    pass
-
-
 def test_call():
     def remove_column_a(df):
         _df = df.copy()
@@ -847,11 +837,6 @@ def test_data_mutability():
     df >> group_by(z='x**2')
     assert 'z' not in df
 
-    df2 = df.copy()
-    df2['x'] = np.nan
-    df2 >> fillna(-1)
-    assert all(df2['x'].isnull())
-
     set_option('modify_input_data', True)
 
     df2 = df.copy()
@@ -862,11 +847,6 @@ def test_data_mutability():
     df2 >> group_by(z='x**2')
     assert 'z' in df2
 
-    df2 = df.copy()
-    df2['x'] = np.nan
-    df2 >> fillna(-1)
-    assert all(df2['x'] == -1)
-
     # Not mutable
     df2 = df.copy()
     df2 >> create(z='x**2')