Skip to content

Commit

Permalink
MAINT: Split package to scripts (#475)
Browse files Browse the repository at this point in the history
* MAINT: split package to scripts

* lint codes

* move codes to outside

* use __len__ replace len

* lint codes

* split package to scripts

* split test_both.py to different scripts

* Simplify a bit

* BOT: auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* lint codes

* import pandas

* Drop useless importing

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Zeroto521 and pre-commit-ci[bot] authored Apr 2, 2022
1 parent 43ae25f commit b1a4873
Show file tree
Hide file tree
Showing 40 changed files with 2,933 additions and 2,726 deletions.
895 changes: 0 additions & 895 deletions dtoolkit/accessor/dataframe.py

This file was deleted.

9 changes: 9 additions & 0 deletions dtoolkit/accessor/dataframe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dtoolkit.accessor.dataframe.cols import cols # noqa
from dtoolkit.accessor.dataframe.drop_inf import drop_inf # noqa
from dtoolkit.accessor.dataframe.expand import expand # noqa
from dtoolkit.accessor.dataframe.filter_in import filter_in # noqa
from dtoolkit.accessor.dataframe.repeat import repeat # noqa
from dtoolkit.accessor.dataframe.to_series import to_series # noqa
from dtoolkit.accessor.dataframe.top_n import top_n # noqa
from dtoolkit.accessor.dataframe.unique_counts import unique_counts # noqa
from dtoolkit.accessor.dataframe.values_to_dict import values_to_dict # noqa
30 changes: 30 additions & 0 deletions dtoolkit/accessor/dataframe/cols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from __future__ import annotations

from textwrap import dedent
from typing import TYPE_CHECKING

import pandas as pd
from pandas.util._decorators import doc

from dtoolkit.accessor.register import register_dataframe_method
from dtoolkit.accessor.series import cols as s_cols


if TYPE_CHECKING:
from dtoolkit._typing import IntOrStr


@register_dataframe_method
@doc(
s_cols,
returns=dedent(
"""
Returns
-------
list of str or int
The column names.
""",
),
)
def cols(df: pd.DataFrame) -> list[IntOrStr]:
return df.columns.tolist()
151 changes: 151 additions & 0 deletions dtoolkit/accessor/dataframe/drop_inf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
from pandas.util._validators import validate_bool_kwarg

from dtoolkit.accessor._util import get_inf_range
from dtoolkit.accessor._util import get_mask
from dtoolkit.accessor.register import register_dataframe_method


if TYPE_CHECKING:
from dtoolkit._typing import IntOrStr


@register_dataframe_method
def drop_inf(
df: pd.DataFrame,
axis: IntOrStr = 0,
how: str = "any",
inf: str = "all",
subset: list[str] = None,
inplace: bool = False,
) -> pd.DataFrame | None:
"""
Remove ``inf`` values.
Parameters
----------
axis : {0 or 'index', 1 or 'columns'}, default 0
Determine if rows or columns which contain ``inf`` values are
removed.
* 0, or 'index' : Drop rows which contain ``inf`` values.
* 1, or 'columns' : Drop columns which contain ``inf`` value.
how : {'any', 'all'}, default 'any'
Determine if row or column is removed from :obj:`~pandas.DataFrame`,
when we have at least one ``inf`` or all ``inf``.
* 'any' : If any ``inf`` values are present, drop that row or column.
* 'all' : If all values are ``inf``, drop that row or column.
inf : {'all', 'pos', 'neg'}, default 'all'
* 'all' : Remove ``inf`` and ``-inf``.
* 'pos' : Only remove ``inf``.
* 'neg' : Only remove ``-inf``.
subset : array-like, optional
Labels along other axis to consider, e.g. if you are dropping rows
these would be a list of columns to include.
inplace : bool, default False
If True, do operation inplace and return None.
Returns
-------
DataFrame or None
DataFrame with ``inf`` entries dropped from it or None if
``inplace=True``.
See Also
--------
dtoolkit.accessor.series.drop_inf
:obj:`~pandas.Series` drops ``inf`` values.
Examples
--------
>>> import dtoolkit.accessor
>>> import pandas as pd
>>> import numpy as np
>>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
... "toy": [np.inf, 'Batmobile', 'Bullwhip'],
... "born": [np.inf, pd.Timestamp("1940-04-25"),
... -np.inf]})
>>> df
name toy born
0 Alfred inf inf
1 Batman Batmobile 1940-04-25 00:00:00
2 Catwoman Bullwhip -inf
Drop the rows where at least one element is inf and -inf.
>>> df.drop_inf()
name toy born
1 Batman Batmobile 1940-04-25 00:00:00
Drop the columns where at least one element is inf and -inf.
>>> df.drop_inf(axis='columns')
name
0 Alfred
1 Batman
2 Catwoman
Drop the rows where all elements are inf and -inf.
>>> df.drop_inf(how='all')
name toy born
0 Alfred inf inf
1 Batman Batmobile 1940-04-25 00:00:00
2 Catwoman Bullwhip -inf
Drop the rows where at least one element is -inf.
>>> df.drop_inf(inf='neg')
name toy born
0 Alfred inf inf
1 Batman Batmobile 1940-04-25 00:00:00
Define in which columns to look for inf and -inf values.
>>> df.drop_inf(subset=['name', 'toy'])
name toy born
1 Batman Batmobile 1940-04-25 00:00:00
2 Catwoman Bullwhip -inf
Keep the DataFrame with valid entries in the same variable.
>>> df.drop_inf(inplace=True)
>>> df
name toy born
1 Batman Batmobile 1940-04-25 00:00:00
"""

inplace = validate_bool_kwarg(inplace, "inplace")

axis = df._get_axis_number(axis)
agg_axis = 1 - axis

agg_obj = df
if subset is not None:
ax = df._get_axis(agg_axis)
indices = ax.get_indexer_for(subset)
check = indices == -1
if check.any():
raise KeyError(list(np.compress(check, subset)))

agg_obj = df.take(indices, axis=agg_axis)

inf_range = get_inf_range(inf)
mask = agg_obj.isin(inf_range)
mask = get_mask(how, mask, agg_axis)
result = df.loc(axis=axis)[~mask]

if not inplace:
return result

df._update_inplace(result)
97 changes: 97 additions & 0 deletions dtoolkit/accessor/dataframe/expand.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from __future__ import annotations

from textwrap import dedent
from typing import TYPE_CHECKING

import pandas as pd
from pandas.util._decorators import doc

from dtoolkit.accessor.register import register_dataframe_method
from dtoolkit.accessor.series import expand as s_expand


if TYPE_CHECKING:
from dtoolkit._typing import IntOrStr


@register_dataframe_method
@doc(
s_expand,
see_also=dedent(
"""
See Also
--------
dtoolkit.accessor.series.expand
Transform each element of a list-like to a column.
pandas.DataFrame.explode
Transform each element of a list-like to a row.
""",
),
examples=dedent(
"""
Examples
--------
>>> import dtoolkit.accessor
>>> import pandas as pd
>>> import numpy as np
Expand the *list-like* element.
>>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
... 'B': 1,
... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
>>> df.expand()
A_0 A_1 A_2 B C_0 C_1 C_2
0 0 1.0 2.0 1 a b c
1 foo NaN NaN 1 NaN None None
2 None NaN NaN 1 None None None
3 3 4.0 NaN 1 d e None
Expand *sub-element* type is list-like.
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", "b"), (3, (5, 6))]})
>>> df.expand(flatten=True)
col1 col2_0 col2_1 col2_2
0 1 a b NaN
1 2 3 5 6.0
Set the columns of name.
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", 3), ("b", 4)]})
>>> df.expand(suffix=["index", "value"], delimiter="-")
col1 col2-index col2-value
0 1 a 3
1 2 b 4
Also could handle **different lengths** of element and suffix list.
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [(3, 4), (5, 6, 7)]})
>>> df.expand()
col1 col2_0 col2_1 col2_2
0 1 3 4 NaN
1 2 5 6 7.0
>>> df.expand(suffix=["a", "b", "c", "d"])
col1 col2_a col2_b col2_c
0 1 3 4 NaN
1 2 5 6 7.0
""",
),
)
def expand(
df: pd.DataFrame,
suffix: list[IntOrStr] = None,
delimiter: str = "_",
flatten: bool = False,
) -> pd.DataFrame:

return pd.concat(
(
df.get(column).expand(
suffix=suffix,
delimiter=delimiter,
flatten=flatten,
)
for column in df
),
axis=1,
)
Loading

0 comments on commit b1a4873

Please sign in to comment.