-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MAINT: Split package to scripts (#475)
* MAINT: split package to scripts * lint codes * move codes to outside * use __len__ replace len * lint codes * split package to scripts * split test_both.py to different scripts * Simplify a bit * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * lint codes * import pandas * Drop useless importing Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
43ae25f
commit b1a4873
Showing
40 changed files
with
2,933 additions
and
2,726 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from dtoolkit.accessor.dataframe.cols import cols # noqa | ||
from dtoolkit.accessor.dataframe.drop_inf import drop_inf # noqa | ||
from dtoolkit.accessor.dataframe.expand import expand # noqa | ||
from dtoolkit.accessor.dataframe.filter_in import filter_in # noqa | ||
from dtoolkit.accessor.dataframe.repeat import repeat # noqa | ||
from dtoolkit.accessor.dataframe.to_series import to_series # noqa | ||
from dtoolkit.accessor.dataframe.top_n import top_n # noqa | ||
from dtoolkit.accessor.dataframe.unique_counts import unique_counts # noqa | ||
from dtoolkit.accessor.dataframe.values_to_dict import values_to_dict # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from __future__ import annotations | ||
|
||
from textwrap import dedent | ||
from typing import TYPE_CHECKING | ||
|
||
import pandas as pd | ||
from pandas.util._decorators import doc | ||
|
||
from dtoolkit.accessor.register import register_dataframe_method | ||
from dtoolkit.accessor.series import cols as s_cols | ||
|
||
|
||
if TYPE_CHECKING: | ||
from dtoolkit._typing import IntOrStr | ||
|
||
|
||
@register_dataframe_method | ||
@doc( | ||
s_cols, | ||
returns=dedent( | ||
""" | ||
Returns | ||
------- | ||
list of str or int | ||
The column names. | ||
""", | ||
), | ||
) | ||
def cols(df: pd.DataFrame) -> list[IntOrStr]: | ||
return df.columns.tolist() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from pandas.util._validators import validate_bool_kwarg | ||
|
||
from dtoolkit.accessor._util import get_inf_range | ||
from dtoolkit.accessor._util import get_mask | ||
from dtoolkit.accessor.register import register_dataframe_method | ||
|
||
|
||
if TYPE_CHECKING: | ||
from dtoolkit._typing import IntOrStr | ||
|
||
|
||
@register_dataframe_method | ||
def drop_inf( | ||
df: pd.DataFrame, | ||
axis: IntOrStr = 0, | ||
how: str = "any", | ||
inf: str = "all", | ||
subset: list[str] = None, | ||
inplace: bool = False, | ||
) -> pd.DataFrame | None: | ||
""" | ||
Remove ``inf`` values. | ||
Parameters | ||
---------- | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
Determine if rows or columns which contain ``inf`` values are | ||
removed. | ||
* 0, or 'index' : Drop rows which contain ``inf`` values. | ||
* 1, or 'columns' : Drop columns which contain ``inf`` value. | ||
how : {'any', 'all'}, default 'any' | ||
Determine if row or column is removed from :obj:`~pandas.DataFrame`, | ||
when we have at least one ``inf`` or all ``inf``. | ||
* 'any' : If any ``inf`` values are present, drop that row or column. | ||
* 'all' : If all values are ``inf``, drop that row or column. | ||
inf : {'all', 'pos', 'neg'}, default 'all' | ||
* 'all' : Remove ``inf`` and ``-inf``. | ||
* 'pos' : Only remove ``inf``. | ||
* 'neg' : Only remove ``-inf``. | ||
subset : array-like, optional | ||
Labels along other axis to consider, e.g. if you are dropping rows | ||
these would be a list of columns to include. | ||
inplace : bool, default False | ||
If True, do operation inplace and return None. | ||
Returns | ||
------- | ||
DataFrame or None | ||
DataFrame with ``inf`` entries dropped from it or None if | ||
``inplace=True``. | ||
See Also | ||
-------- | ||
dtoolkit.accessor.series.drop_inf | ||
:obj:`~pandas.Series` drops ``inf`` values. | ||
Examples | ||
-------- | ||
>>> import dtoolkit.accessor | ||
>>> import pandas as pd | ||
>>> import numpy as np | ||
>>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], | ||
... "toy": [np.inf, 'Batmobile', 'Bullwhip'], | ||
... "born": [np.inf, pd.Timestamp("1940-04-25"), | ||
... -np.inf]}) | ||
>>> df | ||
name toy born | ||
0 Alfred inf inf | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
2 Catwoman Bullwhip -inf | ||
Drop the rows where at least one element is inf and -inf. | ||
>>> df.drop_inf() | ||
name toy born | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
Drop the columns where at least one element is inf and -inf. | ||
>>> df.drop_inf(axis='columns') | ||
name | ||
0 Alfred | ||
1 Batman | ||
2 Catwoman | ||
Drop the rows where all elements are inf and -inf. | ||
>>> df.drop_inf(how='all') | ||
name toy born | ||
0 Alfred inf inf | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
2 Catwoman Bullwhip -inf | ||
Drop the rows where at least one element is -inf. | ||
>>> df.drop_inf(inf='neg') | ||
name toy born | ||
0 Alfred inf inf | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
Define in which columns to look for inf and -inf values. | ||
>>> df.drop_inf(subset=['name', 'toy']) | ||
name toy born | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
2 Catwoman Bullwhip -inf | ||
Keep the DataFrame with valid entries in the same variable. | ||
>>> df.drop_inf(inplace=True) | ||
>>> df | ||
name toy born | ||
1 Batman Batmobile 1940-04-25 00:00:00 | ||
""" | ||
|
||
inplace = validate_bool_kwarg(inplace, "inplace") | ||
|
||
axis = df._get_axis_number(axis) | ||
agg_axis = 1 - axis | ||
|
||
agg_obj = df | ||
if subset is not None: | ||
ax = df._get_axis(agg_axis) | ||
indices = ax.get_indexer_for(subset) | ||
check = indices == -1 | ||
if check.any(): | ||
raise KeyError(list(np.compress(check, subset))) | ||
|
||
agg_obj = df.take(indices, axis=agg_axis) | ||
|
||
inf_range = get_inf_range(inf) | ||
mask = agg_obj.isin(inf_range) | ||
mask = get_mask(how, mask, agg_axis) | ||
result = df.loc(axis=axis)[~mask] | ||
|
||
if not inplace: | ||
return result | ||
|
||
df._update_inplace(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from __future__ import annotations | ||
|
||
from textwrap import dedent | ||
from typing import TYPE_CHECKING | ||
|
||
import pandas as pd | ||
from pandas.util._decorators import doc | ||
|
||
from dtoolkit.accessor.register import register_dataframe_method | ||
from dtoolkit.accessor.series import expand as s_expand | ||
|
||
|
||
if TYPE_CHECKING: | ||
from dtoolkit._typing import IntOrStr | ||
|
||
|
||
@register_dataframe_method | ||
@doc( | ||
s_expand, | ||
see_also=dedent( | ||
""" | ||
See Also | ||
-------- | ||
dtoolkit.accessor.series.expand | ||
Transform each element of a list-like to a column. | ||
pandas.DataFrame.explode | ||
Transform each element of a list-like to a row. | ||
""", | ||
), | ||
examples=dedent( | ||
""" | ||
Examples | ||
-------- | ||
>>> import dtoolkit.accessor | ||
>>> import pandas as pd | ||
>>> import numpy as np | ||
Expand the *list-like* element. | ||
>>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]], | ||
... 'B': 1, | ||
... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]}) | ||
>>> df.expand() | ||
A_0 A_1 A_2 B C_0 C_1 C_2 | ||
0 0 1.0 2.0 1 a b c | ||
1 foo NaN NaN 1 NaN None None | ||
2 None NaN NaN 1 None None None | ||
3 3 4.0 NaN 1 d e None | ||
Expand *sub-element* type is list-like. | ||
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", "b"), (3, (5, 6))]}) | ||
>>> df.expand(flatten=True) | ||
col1 col2_0 col2_1 col2_2 | ||
0 1 a b NaN | ||
1 2 3 5 6.0 | ||
Set the columns of name. | ||
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [("a", 3), ("b", 4)]}) | ||
>>> df.expand(suffix=["index", "value"], delimiter="-") | ||
col1 col2-index col2-value | ||
0 1 a 3 | ||
1 2 b 4 | ||
Also could handle **different lengths** of element and suffix list. | ||
>>> df = pd.DataFrame({"col1": [1, 2], "col2": [(3, 4), (5, 6, 7)]}) | ||
>>> df.expand() | ||
col1 col2_0 col2_1 col2_2 | ||
0 1 3 4 NaN | ||
1 2 5 6 7.0 | ||
>>> df.expand(suffix=["a", "b", "c", "d"]) | ||
col1 col2_a col2_b col2_c | ||
0 1 3 4 NaN | ||
1 2 5 6 7.0 | ||
""", | ||
), | ||
) | ||
def expand( | ||
df: pd.DataFrame, | ||
suffix: list[IntOrStr] = None, | ||
delimiter: str = "_", | ||
flatten: bool = False, | ||
) -> pd.DataFrame: | ||
|
||
return pd.concat( | ||
( | ||
df.get(column).expand( | ||
suffix=suffix, | ||
delimiter=delimiter, | ||
flatten=flatten, | ||
) | ||
for column in df | ||
), | ||
axis=1, | ||
) |
Oops, something went wrong.