Skip to content

Commit

Permalink
FIX-modin-project#7250: Revert "PERF-modin-project#6666: Avoid intern…
Browse files Browse the repository at this point in the history
…al reset_index for left merge"

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed May 10, 2024
1 parent 06699a8 commit 3692720
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 32 deletions.
27 changes: 3 additions & 24 deletions modin/core/storage_formats/pandas/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,23 +144,8 @@ def should_keep_index(left, right):
)
return keep_index

def map_func(
left, right, *axis_lengths, kwargs=kwargs, **service_kwargs
): # pragma: no cover
df = pandas.merge(left, right, **kwargs)

if kwargs["how"] == "left":
partition_idx = service_kwargs["partition_idx"]
if len(axis_lengths):
if not should_keep_index(left, right):
# Doesn't work for "inner" case, since the partition sizes of the
# left dataframe may change
start = sum(axis_lengths[:partition_idx])
stop = sum(axis_lengths[: partition_idx + 1])

df.index = pandas.RangeIndex(start, stop)

return df
def map_func(left, right): # pragma: no cover
return pandas.merge(left, right, **kwargs)

# Want to ensure that these are python lists
if left_on is not None and right_on is not None:
Expand Down Expand Up @@ -188,7 +173,6 @@ def map_func(
left._modin_frame.broadcast_apply_full_axis(
axis=1,
func=map_func,
enumerate_partitions=how == "left",
other=right_to_broadcast,
# We're going to explicitly change the shape across the 1-axis,
# so we want for partitioning to adapt as well
Expand All @@ -199,7 +183,6 @@ def map_func(
new_columns=new_columns,
sync_labels=False,
dtypes=new_dtypes,
pass_axis_lengths_to_partitions=how == "left",
)
)

Expand Down Expand Up @@ -238,11 +221,7 @@ def map_func(
else new_left.sort_rows_by_column_values(on)
)

return (
new_left.reset_index(drop=True)
if not keep_index and (kwargs["how"] != "left" or sort)
else new_left
)
return new_left if keep_index else new_left.reset_index(drop=True)
else:
return left.default_to_pandas(pandas.DataFrame.merge, right, **kwargs)

Expand Down
16 changes: 8 additions & 8 deletions modin/tests/pandas/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,20 +230,20 @@ def test_join_6602():
"test_data, test_data2",
[
(
np.random.uniform(0, 100, size=(2**6, 2**6)),
np.random.uniform(0, 100, size=(2**7, 2**6)),
np.random.randint(0, 100, size=(64, 64)),
np.random.randint(0, 100, size=(128, 64)),
),
(
np.random.uniform(0, 100, size=(2**7, 2**6)),
np.random.uniform(0, 100, size=(2**6, 2**6)),
np.random.randint(0, 100, size=(128, 64)),
np.random.randint(0, 100, size=(64, 64)),
),
(
np.random.uniform(0, 100, size=(2**6, 2**6)),
np.random.uniform(0, 100, size=(2**6, 2**7)),
np.random.randint(0, 100, size=(64, 64)),
np.random.randint(0, 100, size=(64, 128)),
),
(
np.random.uniform(0, 100, size=(2**6, 2**7)),
np.random.uniform(0, 100, size=(2**6, 2**6)),
np.random.randint(0, 100, size=(64, 128)),
np.random.randint(0, 100, size=(64, 64)),
),
],
)
Expand Down

0 comments on commit 3692720

Please sign in to comment.