MAINT: Split package to scripts (#475)

* MAINT: split package to scripts * lint codes * move codes to outside * use __len__ replace len * lint codes * split package to scripts * split test_both.py to different scripts * Simplify a bit * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * lint codes * import pandas * Drop useless importing Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Zeroto521 · Apr 2, 2022 · b1a4873 · b1a4873
1 parent 43ae25f
commit b1a4873
Show file tree

Hide file tree

Showing 40 changed files with 2,933 additions and 2,726 deletions.
diff --git a/dtoolkit/accessor/dataframe.py b/dtoolkit/accessor/dataframe.py
diff --git a/dtoolkit/accessor/dataframe/__init__.py b/dtoolkit/accessor/dataframe/__init__.py
@@ -0,0 +1,9 @@
+from dtoolkit.accessor.dataframe.cols import cols  # noqa
+from dtoolkit.accessor.dataframe.drop_inf import drop_inf  # noqa
+from dtoolkit.accessor.dataframe.expand import expand  # noqa
+from dtoolkit.accessor.dataframe.filter_in import filter_in  # noqa
+from dtoolkit.accessor.dataframe.repeat import repeat  # noqa
+from dtoolkit.accessor.dataframe.to_series import to_series  # noqa
+from dtoolkit.accessor.dataframe.top_n import top_n  # noqa
+from dtoolkit.accessor.dataframe.unique_counts import unique_counts  # noqa
+from dtoolkit.accessor.dataframe.values_to_dict import values_to_dict  # noqa
diff --git a/dtoolkit/accessor/dataframe/cols.py b/dtoolkit/accessor/dataframe/cols.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from textwrap import dedent
+from typing import TYPE_CHECKING
+
+import pandas as pd
+from pandas.util._decorators import doc
+
+from dtoolkit.accessor.register import register_dataframe_method
+from dtoolkit.accessor.series import cols as s_cols
+
+
+if TYPE_CHECKING:
+    from dtoolkit._typing import IntOrStr
+
+
+@register_dataframe_method
+@doc(
+    s_cols,
+    returns=dedent(
+        """
+    Returns
+    -------
+    list of str or int
+        The column names.
+    """,
+    ),
+)
+def cols(df: pd.DataFrame) -> list[IntOrStr]:
+    return df.columns.tolist()
diff --git a/dtoolkit/accessor/dataframe/drop_inf.py b/dtoolkit/accessor/dataframe/drop_inf.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pandas as pd
+from pandas.util._validators import validate_bool_kwarg
+
+from dtoolkit.accessor._util import get_inf_range
+from dtoolkit.accessor._util import get_mask
+from dtoolkit.accessor.register import register_dataframe_method
+
+
+if TYPE_CHECKING:
+    from dtoolkit._typing import IntOrStr
+
+
+@register_dataframe_method
+def drop_inf(
+    df: pd.DataFrame,
+    axis: IntOrStr = 0,
+    how: str = "any",
+    inf: str = "all",
+    subset: list[str] = None,
+    inplace: bool = False,
+) -> pd.DataFrame | None:
+    """
+    Remove ``inf`` values.
+
+    Parameters
+    ----------
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        Determine if rows or columns which contain ``inf`` values are
+        removed.
+
+        * 0, or 'index' : Drop rows which contain ``inf`` values.
+        * 1, or 'columns' : Drop columns which contain ``inf`` value.
+
+    how : {'any', 'all'}, default 'any'
+        Determine if row or column is removed from :obj:`~pandas.DataFrame`,
+        when we have at least one ``inf`` or all ``inf``.
+
+        * 'any' : If any ``inf`` values are present, drop that row or column.
+        * 'all' : If all values are ``inf``, drop that row or column.
+
+    inf : {'all', 'pos', 'neg'}, default 'all'
+        * 'all' : Remove ``inf`` and ``-inf``.
+        * 'pos' : Only remove ``inf``.
+        * 'neg' : Only remove ``-inf``.
+
+    subset : array-like, optional
+        Labels along other axis to consider, e.g. if you are dropping rows
+        these would be a list of columns to include.
+
+    inplace : bool, default False
+        If True, do operation inplace and return None.
+
+    Returns
+    -------
+    DataFrame or None
+        DataFrame with ``inf`` entries dropped from it or None if
+        ``inplace=True``.
+
+    See Also
+    --------
+    dtoolkit.accessor.series.drop_inf
+        :obj:`~pandas.Series` drops ``inf`` values.
+
+    Examples
+    --------
+    >>> import dtoolkit.accessor
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
+    ...                    "toy": [np.inf, 'Batmobile', 'Bullwhip'],
+    ...                    "born": [np.inf, pd.Timestamp("1940-04-25"),
+    ...                             -np.inf]})
+    >>> df
+           name        toy                 born
+    0    Alfred        inf                  inf
+    1    Batman  Batmobile  1940-04-25 00:00:00
+    2  Catwoman   Bullwhip                 -inf
+
+    Drop the rows where at least one element is inf and -inf.
+
+    >>> df.drop_inf()
+         name        toy                 born
+    1  Batman  Batmobile  1940-04-25 00:00:00
+
+    Drop the columns where at least one element is inf and -inf.
+
+    >>> df.drop_inf(axis='columns')
+            name
+    0    Alfred
+    1    Batman
+    2  Catwoman
+
+    Drop the rows where all elements are inf and -inf.
+
+    >>> df.drop_inf(how='all')
+           name        toy                 born
+    0    Alfred        inf                  inf
+    1    Batman  Batmobile  1940-04-25 00:00:00
+    2  Catwoman   Bullwhip                 -inf
+
+    Drop the rows where at least one element is -inf.
+
+    >>> df.drop_inf(inf='neg')
+           name        toy                 born
+    0    Alfred        inf                  inf
+    1    Batman  Batmobile  1940-04-25 00:00:00
+
+    Define in which columns to look for inf and -inf values.
+
+    >>> df.drop_inf(subset=['name', 'toy'])
+           name        toy                 born
+    1    Batman  Batmobile  1940-04-25 00:00:00
+    2  Catwoman   Bullwhip                 -inf
+
+    Keep the DataFrame with valid entries in the same variable.
+
+    >>> df.drop_inf(inplace=True)
+    >>> df
+           name        toy                 born
+    1    Batman  Batmobile  1940-04-25 00:00:00
+    """
+
+    inplace = validate_bool_kwarg(inplace, "inplace")
+
+    axis = df._get_axis_number(axis)
+    agg_axis = 1 - axis
+
+    agg_obj = df
+    if subset is not None:
+        ax = df._get_axis(agg_axis)
+        indices = ax.get_indexer_for(subset)
+        check = indices == -1
+        if check.any():
+            raise KeyError(list(np.compress(check, subset)))
+
+        agg_obj = df.take(indices, axis=agg_axis)
+
+    inf_range = get_inf_range(inf)
+    mask = agg_obj.isin(inf_range)
+    mask = get_mask(how, mask, agg_axis)
+    result = df.loc(axis=axis)[~mask]
+
+    if not inplace:
+        return result
+
+    df._update_inplace(result)
diff --git a/dtoolkit/accessor/dataframe/expand.py b/dtoolkit/accessor/dataframe/expand.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+from textwrap import dedent
+from typing import TYPE_CHECKING
+
+import pandas as pd
+from pandas.util._decorators import doc
+
+from dtoolkit.accessor.register import register_dataframe_method
+from dtoolkit.accessor.series import expand as s_expand
+
+
+if TYPE_CHECKING:
+    from dtoolkit._typing import IntOrStr
+
+
+@register_dataframe_method
+@doc(
+    s_expand,
+    see_also=dedent(
+        """
+    See Also
+    --------
+    dtoolkit.accessor.series.expand
+        Transform each element of a list-like to a column.
+    pandas.DataFrame.explode
+        Transform each element of a list-like to a row.
+    """,
+    ),
+    examples=dedent(
+        """
+    Examples
+    --------
+    >>> import dtoolkit.accessor
+    >>> import pandas as pd
+    >>> import numpy as np
+
+    Expand the *list-like* element.
+
+    >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
+    ...                    'B': 1,
+    ...                    'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
+    >>> df.expand()
+        A_0  A_1  A_2  B   C_0   C_1   C_2
+    0     0  1.0  2.0  1     a     b     c
+    1   foo  NaN  NaN  1   NaN  None  None
+    2  None  NaN  NaN  1  None  None  None
+    3     3  4.0  NaN  1     d     e  None
+
+    Expand *sub-element* type is list-like.
+
+    >>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", "b"), (3, (5, 6))]})
+    >>> df.expand(flatten=True)
+       col1 col2_0 col2_1  col2_2
+    0     1      a      b     NaN
+    1     2      3      5     6.0
+
+    Set the columns of name.
+
+    >>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", 3), ("b", 4)]})
+    >>> df.expand(suffix=["index", "value"], delimiter="-")
+       col1  col2-index  col2-value
+    0     1           a           3
+    1     2           b           4
+
+    Also could handle **different lengths** of element and suffix list.
+
+    >>> df = pd.DataFrame({"col1": [1, 2], "col2": [(3, 4), (5, 6, 7)]})
+    >>> df.expand()
+       col1  col2_0  col2_1  col2_2
+    0     1       3       4     NaN
+    1     2       5       6     7.0
+    >>> df.expand(suffix=["a", "b", "c", "d"])
+       col1  col2_a  col2_b  col2_c
+    0     1       3       4     NaN
+    1     2       5       6     7.0
+    """,
+    ),
+)
+def expand(
+    df: pd.DataFrame,
+    suffix: list[IntOrStr] = None,
+    delimiter: str = "_",
+    flatten: bool = False,
+) -> pd.DataFrame:
+
+    return pd.concat(
+        (
+            df.get(column).expand(
+                suffix=suffix,
+                delimiter=delimiter,
+                flatten=flatten,
+            )
+            for column in df
+        ),
+        axis=1,
+    )