Skip to content

Commit

Permalink
REF: dont alter self in _validate_specification (pandas-dev#48051)
Browse files Browse the repository at this point in the history
* REF: dont alter self in _validate_specification

* validate_specification->validate_left_right_on
  • Loading branch information
jbrockmendel authored and YYYasin19 committed Aug 23, 2022
1 parent 9ed31f3 commit afe0400
Showing 1 changed file with 48 additions and 44 deletions.
92 changes: 48 additions & 44 deletions pandas/core/reshape/merge.py
Expand Up @@ -640,8 +640,6 @@ def __init__(
self.axis = 1 - axis if self.left.ndim == 2 else 0

self.on = com.maybe_make_list(on)
self.left_on = com.maybe_make_list(left_on)
self.right_on = com.maybe_make_list(right_on)

self.copy = copy
self.suffixes = suffixes
Expand Down Expand Up @@ -674,7 +672,7 @@ def __init__(
msg, FutureWarning, stacklevel=find_stack_level(inspect.currentframe())
)

self._validate_specification()
self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)

cross_col = None
if self.how == "cross":
Expand Down Expand Up @@ -1077,7 +1075,7 @@ def _get_merge_keys(self):
# a pd.merge_asof(left_index=True, left_by=...) will result in a
# self.left_on array with a None in the middle of it. This requires
# a work-around as designated in the code below.
# See _validate_specification() for where this happens.
# See _validate_left_right_on() for where this happens.

# ugh, spaghetti re #733
if _any(self.left_on) and _any(self.right_on):
Expand Down Expand Up @@ -1325,25 +1323,27 @@ def _create_cross_configuration(
cross_col,
)

def _validate_specification(self) -> None:
def _validate_left_right_on(self, left_on, right_on):
left_on = com.maybe_make_list(left_on)
right_on = com.maybe_make_list(right_on)

if self.how == "cross":
if (
self.left_index
or self.right_index
or self.right_on is not None
or self.left_on is not None
or right_on is not None
or left_on is not None
or self.on is not None
):
raise MergeError(
"Can not pass on, right_on, left_on or set right_index=True or "
"left_index=True"
)
return
# Hm, any way to make this logic less complicated??
elif self.on is None and self.left_on is None and self.right_on is None:
elif self.on is None and left_on is None and right_on is None:

if self.left_index and self.right_index:
self.left_on, self.right_on = (), ()
left_on, right_on = (), ()
elif self.left_index:
raise MergeError("Must pass right_on or right_index=True")
elif self.right_index:
Expand All @@ -1356,8 +1356,8 @@ def _validate_specification(self) -> None:
if len(common_cols) == 0:
raise MergeError(
"No common columns to perform merge on. "
f"Merge options: left_on={self.left_on}, "
f"right_on={self.right_on}, "
f"Merge options: left_on={left_on}, "
f"right_on={right_on}, "
f"left_index={self.left_index}, "
f"right_index={self.right_index}"
)
Expand All @@ -1366,9 +1366,9 @@ def _validate_specification(self) -> None:
or not right_cols.join(common_cols, how="inner").is_unique
):
raise MergeError(f"Data columns not unique: {repr(common_cols)}")
self.left_on = self.right_on = common_cols
left_on = right_on = common_cols
elif self.on is not None:
if self.left_on is not None or self.right_on is not None:
if left_on is not None or right_on is not None:
raise MergeError(
'Can only pass argument "on" OR "left_on" '
'and "right_on", not a combination of both.'
Expand All @@ -1378,40 +1378,42 @@ def _validate_specification(self) -> None:
'Can only pass argument "on" OR "left_index" '
'and "right_index", not a combination of both.'
)
self.left_on = self.right_on = self.on
elif self.left_on is not None:
left_on = right_on = self.on
elif left_on is not None:
if self.left_index:
raise MergeError(
'Can only pass argument "left_on" OR "left_index" not both.'
)
if not self.right_index and self.right_on is None:
if not self.right_index and right_on is None:
raise MergeError('Must pass "right_on" OR "right_index".')
n = len(self.left_on)
n = len(left_on)
if self.right_index:
if len(self.left_on) != self.right.index.nlevels:
if len(left_on) != self.right.index.nlevels:
raise ValueError(
"len(left_on) must equal the number "
'of levels in the index of "right"'
)
self.right_on = [None] * n
elif self.right_on is not None:
right_on = [None] * n
elif right_on is not None:
if self.right_index:
raise MergeError(
'Can only pass argument "right_on" OR "right_index" not both.'
)
if not self.left_index and self.left_on is None:
if not self.left_index and left_on is None:
raise MergeError('Must pass "left_on" OR "left_index".')
n = len(self.right_on)
n = len(right_on)
if self.left_index:
if len(self.right_on) != self.left.index.nlevels:
if len(right_on) != self.left.index.nlevels:
raise ValueError(
"len(right_on) must equal the number "
'of levels in the index of "left"'
)
self.left_on = [None] * n
if self.how != "cross" and len(self.right_on) != len(self.left_on):
left_on = [None] * n
if self.how != "cross" and len(right_on) != len(left_on):
raise ValueError("len(right_on) must equal len(left_on)")

return left_on, right_on

def _validate(self, validate: str) -> None:

# Check uniqueness of each
Expand Down Expand Up @@ -1769,14 +1771,14 @@ def __init__(
fill_method=fill_method,
)

def _validate_specification(self) -> None:
super()._validate_specification()
def _validate_left_right_on(self, left_on, right_on):
left_on, right_on = super()._validate_left_right_on(left_on, right_on)

# we only allow on to be a single item for on
if len(self.left_on) != 1 and not self.left_index:
if len(left_on) != 1 and not self.left_index:
raise MergeError("can only asof on a key for left")

if len(self.right_on) != 1 and not self.right_index:
if len(right_on) != 1 and not self.right_index:
raise MergeError("can only asof on a key for right")

if self.left_index and isinstance(self.left.index, MultiIndex):
Expand All @@ -1797,27 +1799,27 @@ def _validate_specification(self) -> None:

# GH#29130 Check that merge keys do not have dtype object
if not self.left_index:
left_on = self.left_on[0]
if is_array_like(left_on):
lo_dtype = left_on.dtype
left_on_0 = left_on[0]
if is_array_like(left_on_0):
lo_dtype = left_on_0.dtype
else:
lo_dtype = (
self.left[left_on].dtype
if left_on in self.left.columns
else self.left.index.get_level_values(left_on)
self.left[left_on_0].dtype
if left_on_0 in self.left.columns
else self.left.index.get_level_values(left_on_0)
)
else:
lo_dtype = self.left.index.dtype

if not self.right_index:
right_on = self.right_on[0]
if is_array_like(right_on):
ro_dtype = right_on.dtype
right_on_0 = right_on[0]
if is_array_like(right_on_0):
ro_dtype = right_on_0.dtype
else:
ro_dtype = (
self.right[right_on].dtype
if right_on in self.right.columns
else self.right.index.get_level_values(right_on)
self.right[right_on_0].dtype
if right_on_0 in self.right.columns
else self.right.index.get_level_values(right_on_0)
)
else:
ro_dtype = self.right.index.dtype
Expand All @@ -1839,13 +1841,15 @@ def _validate_specification(self) -> None:
if len(self.left_by) != len(self.right_by):
raise MergeError("left_by and right_by must be same length")

self.left_on = self.left_by + list(self.left_on)
self.right_on = self.right_by + list(self.right_on)
left_on = self.left_by + list(left_on)
right_on = self.right_by + list(right_on)

# check 'direction' is valid
if self.direction not in ["backward", "forward", "nearest"]:
raise MergeError(f"direction invalid: {self.direction}")

return left_on, right_on

def _get_merge_keys(self):

# note this function has side effects
Expand Down

0 comments on commit afe0400

Please sign in to comment.