Skip to content

Commit

Permalink
FIX-modin-project#6899: Avoid sending lazy categorical proxies to wor…
Browse files Browse the repository at this point in the history
…kers

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Jan 31, 2024
1 parent c130e13 commit 6b7fc09
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3852,7 +3852,22 @@ def apply_func(df): # pragma: no cover
# 2. The second one works slower, but only gathers light pandas.Index objects,
# so there should be less stress on the network.
if add_missing_cats or not IsRayCluster.get():
original_dtypes = self.dtypes if self.has_materialized_dtypes else None
if self.has_materialized_dtypes:
original_dtypes = pandas.Series(
{
# lazy proxies hold a reference to another modin's DataFrame which can be
# a problem during serialization, in this scenario we don't need actual
# categorical values, so a "category" string will be enough
name: (
"category"
if isinstance(dtype, LazyProxyCategoricalDtype)
else dtype
)
for name, dtype in self.dtypes.items()
}
)
else:
original_dtypes = None

def compute_aligned_columns(*dfs, initial_columns=None):
"""Take row partitions, filter empty ones, and return joined columns for them."""
Expand Down

0 comments on commit 6b7fc09

Please sign in to comment.