Skip to content

Commit

Permalink
update categorical profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
atl1502 committed Apr 23, 2024
1 parent 503efa2 commit 0751d86
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 51 deletions.
10 changes: 8 additions & 2 deletions dataprofiler/profilers/categorical_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from typing import cast

import datasketches
from pandas import DataFrame, Series
import pandas as pd
import polars as pl
from polars import DataFrame, Series

from .. import dp_logging
from . import profiler_utils
Expand Down Expand Up @@ -601,7 +603,8 @@ def _get_categories_full(self, df_series) -> dict:
:return: dict of counts for each unique value
:rtype: dict
"""
category_count: dict = df_series.value_counts(dropna=False).to_dict()
value_counts = df_series.value_counts(sort=True)
category_count: dict = dict(value_counts.iter_rows())
return category_count

@BaseColumnProfiler._timeit(name="categories")
Expand Down Expand Up @@ -678,6 +681,9 @@ def update(self, df_series: Series) -> CategoricalColumn:
:return: updated CategoricalColumn
:rtype: CategoricalColumn
"""
# TODO remove onces profiler builder is updated
if type(df_series) == pd.Series:
df_series = pl.from_pandas(df_series) # type: ignore
# If condition for limiting profile calculations
if len(df_series) == 0 or self._stop_condition_is_met:
return self
Expand Down
Loading

0 comments on commit 0751d86

Please sign in to comment.