Skip to content

Commit

Permalink
Updated isort and pre-commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
adocquin committed Apr 12, 2023
1 parent 4cdf3c1 commit a205104
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 119 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ __pycache__
.vscode
.coverage
.python-version
.pytest_cache
cb_model.onnx
encoding.json
htmlcov
dist
coolpandas.egg-info
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ repos:
"--rcfile=pyproject.toml",
]
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.3.0
hooks:
- id: black
language_version: python3.11
args: [--config=pyproject.toml]
entry: black --check .
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.12.0
hooks:
- id: isort
language_version: python3.11
Expand Down
98 changes: 49 additions & 49 deletions coolpandas/eda/duplicates.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
"""DataFrames cleaning module."""
import pandas as pd


def duplicated_rows(
data_frame: pd.DataFrame, display_summary: bool = True, drop: bool = False
) -> pd.DataFrame:
"""Get duplicated rows in a DataFrame and drop them if specified.
Args:
data_frame (pd.DataFrame): DataFrame to get duplicated rows.
display_summary (bool, optional): Whether to display summary. Defaults to True.
drop (bool, optional): Whether to drop duplicated rows. Defaults to True.
Returns:
pd.DataFrame: Duplicated rows.
"""
duplicated_data_frame: pd.DataFrame = data_frame[data_frame.duplicated(keep=False)]
if display_summary:
print(f"Number of duplicated rows: {duplicated_data_frame.shape[0]}")
if drop:
data_frame.drop_duplicates(inplace=True)
if display_summary and drop:
print("DataFrame shape after dropping duplicated rows:")
print(data_frame.shape)
return duplicated_data_frame


def duplicated_columns(
data_frame: pd.DataFrame, display_summary: bool = True, drop: bool = False
) -> pd.DataFrame:
"""Get duplicated columns in a DataFrame and drop them if specified.
Args:
data_frame (pd.DataFrame): DataFrame to get duplicated columns.
display_summary (bool, optional): Whether to display summary. Defaults to True.
drop (bool, optional): Whether to drop duplicated columns. Defaults to True.
Returns:
pd.DataFrame: Duplicated columns.
"""
# uniques, indexes = np.unique(data_frame, return_index=True, axis=1)
# return pd.DataFrame(uniq, index=data_frame.index, columns=data_frame.columns[~indexes])
duplicates: pd.Series = data_frame.apply(lambda x: x.duplicated(), axis=1).all()
duplicated_data_frame: pd.DataFrame = data_frame[duplicates[duplicates].index]
if display_summary:
print(f"Number of duplicated columns: {duplicated_data_frame.shape[1]}")
if drop:
data_frame.drop(columns=duplicated_data_frame.columns, inplace=True)
if display_summary and drop:
print("DataFrame shape after dropping duplicated columns:")
print(data_frame.shape)
return duplicated_data_frame
"""DataFrames cleaning module."""
import pandas as pd


def duplicated_rows(
data_frame: pd.DataFrame, display_summary: bool = True, drop: bool = False
) -> pd.DataFrame:
"""Get duplicated rows in a DataFrame and drop them if specified.
Args:
data_frame (pd.DataFrame): DataFrame to get duplicated rows.
display_summary (bool, optional): Whether to display summary. Defaults to True.
drop (bool, optional): Whether to drop duplicated rows. Defaults to True.
Returns:
pd.DataFrame: Duplicated rows.
"""
duplicated_data_frame: pd.DataFrame = data_frame[data_frame.duplicated(keep=False)]
if display_summary:
print(f"Number of duplicated rows: {duplicated_data_frame.shape[0]}")
if drop:
data_frame.drop_duplicates(inplace=True)
if display_summary and drop:
print("DataFrame shape after dropping duplicated rows:")
print(data_frame.shape)
return duplicated_data_frame


def duplicated_columns(
data_frame: pd.DataFrame, display_summary: bool = True, drop: bool = False
) -> pd.DataFrame:
"""Get duplicated columns in a DataFrame and drop them if specified.
Args:
data_frame (pd.DataFrame): DataFrame to get duplicated columns.
display_summary (bool, optional): Whether to display summary. Defaults to True.
drop (bool, optional): Whether to drop duplicated columns. Defaults to True.
Returns:
pd.DataFrame: Duplicated columns.
"""
# uniques, indexes = np.unique(data_frame, return_index=True, axis=1)
# return pd.DataFrame(uniq, index=data_frame.index, columns=data_frame.columns[~indexes])
duplicates: pd.Series = data_frame.apply(lambda x: x.duplicated(), axis=1).all()
duplicated_data_frame: pd.DataFrame = data_frame[duplicates[duplicates].index]
if display_summary:
print(f"Number of duplicated columns: {duplicated_data_frame.shape[1]}")
if drop:
data_frame.drop(columns=duplicated_data_frame.columns, inplace=True)
if display_summary and drop:
print("DataFrame shape after dropping duplicated columns:")
print(data_frame.shape)
return duplicated_data_frame
1 change: 1 addition & 0 deletions coolpandas/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from .barplot import barplot
from .distplot import distplot
from .geoplot import geoplot
from .lineplot import lineplot
from .mapplot import mapplot
40 changes: 40 additions & 0 deletions coolpandas/plot/lineplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Lineplot module."""
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from .style import custom_template, format_title


def lineplot(
data_frame: pd.DataFrame,
x_axis: str,
y_axis: str,
title: str,
subtitle: str = None,
**kwargs,
) -> go.Figure:
"""Create a bar plot.
Args:
data_frame (pd.DataFrame): DataFrame to plot.
x_axis (str): Column to use as x axis.
y_axis (str): Column to use as y axis.
title (str): Title of the plot.
subtitle (str, optional): Subtitle of the plot. Defaults to None.
**kwargs: Keyword arguments to pass to plotly.express.bar.
Returns:
go.Figure: Bar plot figure.
"""
fig = px.line(
data_frame,
x=x_axis,
y=y_axis,
title=format_title(title, subtitle=subtitle),
template=custom_template,
width=800,
height=400,
**kwargs,
)
return fig
24 changes: 12 additions & 12 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
pandas==1.5.1
scipy==1.9.3
scikit-learn==1.1.3
plotly==5.11.0
nbformat==5.7.0
ipython==8.6.0
openrouteservice==2.3.3
pytest-cov==4.0.0
black==22.10.0
pylint==2.15.5
isort==5.10.1
pre-commit==2.20.0
pandas>=1.5.1
scipy>=1.9.3
scikit-learn>=1.1.3
plotly>=5.11.0
nbformat>=5.7.0
ipython>=8.6.0
openrouteservice>=2.3.3
pytest-cov>=4.0.0
black>=22.10.0
pylint>=2.15.5
isort>=5.12.0
pre-commit>=2.20.0
112 changes: 56 additions & 56 deletions tests/eda/test_duplicates.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,56 @@
"""Test duplicates.py functions."""
from unittest.mock import MagicMock, patch

import pandas as pd
from numpy.testing import assert_equal

from coolpandas import eda


@patch("builtins.print")
def test_duplicated_rows(mock_print: MagicMock, test_dataframe: pd.DataFrame) -> None:
"""Test duplicated_rows function."""
duplicated_rows: pd.DataFrame = eda.duplicated_rows(
test_dataframe, display_summary=False, drop=False
)
assert duplicated_rows.empty

test_dataframe_duplicated: pd.DataFrame = pd.concat(
[test_dataframe, test_dataframe.head()]
)
duplicated_rows: pd.DataFrame = eda.duplicated_rows(
test_dataframe_duplicated, display_summary=False, drop=False
)
assert len(duplicated_rows) == 8

eda.duplicated_rows(test_dataframe_duplicated, display_summary=True, drop=False)
eda.duplicated_rows(test_dataframe_duplicated, display_summary=True, drop=True)
assert_equal(
test_dataframe_duplicated.fillna(0).values, test_dataframe.fillna(0).values
)
assert mock_print.call_count == 4


@patch("builtins.print")
def test_duplicated_columns(
mock_print: MagicMock, test_dataframe: pd.DataFrame
) -> None:
"""Test duplicated_columns function."""
duplicated_columns: list[str] = eda.duplicated_columns(
test_dataframe, display_summary=False, drop=False
)
assert duplicated_columns.empty

test_dataframe_duplicated: pd.DataFrame = test_dataframe.copy()
test_dataframe_duplicated["test"] = test_dataframe_duplicated["Animal"]
duplicated_columns: list[str] = eda.duplicated_columns(
test_dataframe_duplicated, display_summary=False, drop=False
)
assert_equal(duplicated_columns.columns.values, ["test"])

eda.duplicated_columns(test_dataframe_duplicated, display_summary=True, drop=False)
eda.duplicated_columns(test_dataframe_duplicated, display_summary=True, drop=True)
assert_equal(
test_dataframe_duplicated.columns.values, test_dataframe.columns.values
)
assert mock_print.call_count == 4
"""Test duplicates.py functions."""
from unittest.mock import MagicMock, patch

import pandas as pd
from numpy.testing import assert_equal

from coolpandas import eda


@patch("builtins.print")
def test_duplicated_rows(mock_print: MagicMock, test_dataframe: pd.DataFrame) -> None:
"""Test duplicated_rows function."""
duplicated_rows: pd.DataFrame = eda.duplicated_rows(
test_dataframe, display_summary=False, drop=False
)
assert duplicated_rows.empty

test_dataframe_duplicated: pd.DataFrame = pd.concat(
[test_dataframe, test_dataframe.head()]
)
duplicated_rows: pd.DataFrame = eda.duplicated_rows(
test_dataframe_duplicated, display_summary=False, drop=False
)
assert len(duplicated_rows) == 8

eda.duplicated_rows(test_dataframe_duplicated, display_summary=True, drop=False)
eda.duplicated_rows(test_dataframe_duplicated, display_summary=True, drop=True)
assert_equal(
test_dataframe_duplicated.fillna(0).values, test_dataframe.fillna(0).values
)
assert mock_print.call_count == 4


@patch("builtins.print")
def test_duplicated_columns(
mock_print: MagicMock, test_dataframe: pd.DataFrame
) -> None:
"""Test duplicated_columns function."""
duplicated_columns: list[str] = eda.duplicated_columns(
test_dataframe, display_summary=False, drop=False
)
assert duplicated_columns.empty

test_dataframe_duplicated: pd.DataFrame = test_dataframe.copy()
test_dataframe_duplicated["test"] = test_dataframe_duplicated["Animal"]
duplicated_columns: list[str] = eda.duplicated_columns(
test_dataframe_duplicated, display_summary=False, drop=False
)
assert_equal(duplicated_columns.columns.values, ["test"])

eda.duplicated_columns(test_dataframe_duplicated, display_summary=True, drop=False)
eda.duplicated_columns(test_dataframe_duplicated, display_summary=True, drop=True)
assert_equal(
test_dataframe_duplicated.columns.values, test_dataframe.columns.values
)
assert mock_print.call_count == 4

0 comments on commit a205104

Please sign in to comment.