Skip to content

Commit

Permalink
FIX-modin-project#2751: speed up dtypes
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Feb 18, 2021
1 parent 61c6e99 commit 33a4b5b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 9 deletions.
11 changes: 3 additions & 8 deletions modin/engines/base/frame/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,9 @@ def _compute_dtypes(self):
def dtype_builder(df):
return df.apply(lambda col: find_common_type(col.values), axis=0)

map_func = self._build_mapreduce_func(0, lambda df: df.dtypes)
reduce_func = self._build_mapreduce_func(0, dtype_builder)
# For now we will use a pandas Series for the dtypes.
if len(self.columns) > 0:
dtypes = self._map_reduce(0, map_func, reduce_func).to_pandas().iloc[0]
dtypes = self._map_reduce(0, lambda df: df.dtypes, dtype_builder)
else:
dtypes = pandas.Series([])
# reset name to None because we use "__reduced__" internally
Expand Down Expand Up @@ -1149,13 +1147,10 @@ def _map_reduce(self, axis, map_func, reduce_func=None, preserve_index=True):
else:
reduce_func = self._build_mapreduce_func(axis, reduce_func)

map_parts = self._frame_mgr_cls.map_partitions(self._partitions, map_func)
reduce_parts = self._frame_mgr_cls.map_axis_partitions(
axis, map_parts, reduce_func
)
return self._compute_map_reduce_metadata(
axis, reduce_parts, preserve_index=preserve_index
0, self._partitions, lambda df: df.dtypes
)
return pandas.concat([x.to_pandas() for x in reduce_parts[0]])

def _map(self, func, dtypes=None, validate_index=False, validate_columns=False):
"""Perform a function that maps across the entire dataset.
Expand Down
2 changes: 1 addition & 1 deletion modin/engines/base/frame/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def broadcast_axis_partitions(
elif lengths:
num_splits = len(lengths)
else:
num_splits = NPartitions.get()
num_splits = 1 # NPartitions.get()
preprocessed_map_func = cls.preprocess_func(apply_func)
left_partitions = cls.axis_partition(left, axis)
right_partitions = None if right is None else cls.axis_partition(right, axis)
Expand Down

0 comments on commit 33a4b5b

Please sign in to comment.