diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 659cb1d2f68387..9298142d0ec75d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -640,8 +640,6 @@ def __init__( self.axis = 1 - axis if self.left.ndim == 2 else 0 self.on = com.maybe_make_list(on) - self.left_on = com.maybe_make_list(left_on) - self.right_on = com.maybe_make_list(right_on) self.copy = copy self.suffixes = suffixes @@ -674,7 +672,7 @@ def __init__( msg, FutureWarning, stacklevel=find_stack_level(inspect.currentframe()) ) - self._validate_specification() + self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) cross_col = None if self.how == "cross": @@ -1077,7 +1075,7 @@ def _get_merge_keys(self): # a pd.merge_asof(left_index=True, left_by=...) will result in a # self.left_on array with a None in the middle of it. This requires # a work-around as designated in the code below. - # See _validate_specification() for where this happens. + # See _validate_left_right_on() for where this happens. # ugh, spaghetti re #733 if _any(self.left_on) and _any(self.right_on): @@ -1325,25 +1323,27 @@ def _create_cross_configuration( cross_col, ) - def _validate_specification(self) -> None: + def _validate_left_right_on(self, left_on, right_on): + left_on = com.maybe_make_list(left_on) + right_on = com.maybe_make_list(right_on) + if self.how == "cross": if ( self.left_index or self.right_index - or self.right_on is not None - or self.left_on is not None + or right_on is not None + or left_on is not None or self.on is not None ): raise MergeError( "Can not pass on, right_on, left_on or set right_index=True or " "left_index=True" ) - return # Hm, any way to make this logic less complicated?? - elif self.on is None and self.left_on is None and self.right_on is None: + elif self.on is None and left_on is None and right_on is None: if self.left_index and self.right_index: - self.left_on, self.right_on = (), () + left_on, right_on = (), () elif self.left_index: raise MergeError("Must pass right_on or right_index=True") elif self.right_index: @@ -1356,8 +1356,8 @@ def _validate_specification(self) -> None: if len(common_cols) == 0: raise MergeError( "No common columns to perform merge on. " - f"Merge options: left_on={self.left_on}, " - f"right_on={self.right_on}, " + f"Merge options: left_on={left_on}, " + f"right_on={right_on}, " f"left_index={self.left_index}, " f"right_index={self.right_index}" ) @@ -1366,9 +1366,9 @@ def _validate_specification(self) -> None: or not right_cols.join(common_cols, how="inner").is_unique ): raise MergeError(f"Data columns not unique: {repr(common_cols)}") - self.left_on = self.right_on = common_cols + left_on = right_on = common_cols elif self.on is not None: - if self.left_on is not None or self.right_on is not None: + if left_on is not None or right_on is not None: raise MergeError( 'Can only pass argument "on" OR "left_on" ' 'and "right_on", not a combination of both.' @@ -1378,40 +1378,42 @@ def _validate_specification(self) -> None: 'Can only pass argument "on" OR "left_index" ' 'and "right_index", not a combination of both.' ) - self.left_on = self.right_on = self.on - elif self.left_on is not None: + left_on = right_on = self.on + elif left_on is not None: if self.left_index: raise MergeError( 'Can only pass argument "left_on" OR "left_index" not both.' ) - if not self.right_index and self.right_on is None: + if not self.right_index and right_on is None: raise MergeError('Must pass "right_on" OR "right_index".') - n = len(self.left_on) + n = len(left_on) if self.right_index: - if len(self.left_on) != self.right.index.nlevels: + if len(left_on) != self.right.index.nlevels: raise ValueError( "len(left_on) must equal the number " 'of levels in the index of "right"' ) - self.right_on = [None] * n - elif self.right_on is not None: + right_on = [None] * n + elif right_on is not None: if self.right_index: raise MergeError( 'Can only pass argument "right_on" OR "right_index" not both.' ) - if not self.left_index and self.left_on is None: + if not self.left_index and left_on is None: raise MergeError('Must pass "left_on" OR "left_index".') - n = len(self.right_on) + n = len(right_on) if self.left_index: - if len(self.right_on) != self.left.index.nlevels: + if len(right_on) != self.left.index.nlevels: raise ValueError( "len(right_on) must equal the number " 'of levels in the index of "left"' ) - self.left_on = [None] * n - if self.how != "cross" and len(self.right_on) != len(self.left_on): + left_on = [None] * n + if self.how != "cross" and len(right_on) != len(left_on): raise ValueError("len(right_on) must equal len(left_on)") + return left_on, right_on + def _validate(self, validate: str) -> None: # Check uniqueness of each @@ -1769,14 +1771,14 @@ def __init__( fill_method=fill_method, ) - def _validate_specification(self) -> None: - super()._validate_specification() + def _validate_left_right_on(self, left_on, right_on): + left_on, right_on = super()._validate_left_right_on(left_on, right_on) # we only allow on to be a single item for on - if len(self.left_on) != 1 and not self.left_index: + if len(left_on) != 1 and not self.left_index: raise MergeError("can only asof on a key for left") - if len(self.right_on) != 1 and not self.right_index: + if len(right_on) != 1 and not self.right_index: raise MergeError("can only asof on a key for right") if self.left_index and isinstance(self.left.index, MultiIndex): @@ -1797,27 +1799,27 @@ def _validate_specification(self) -> None: # GH#29130 Check that merge keys do not have dtype object if not self.left_index: - left_on = self.left_on[0] - if is_array_like(left_on): - lo_dtype = left_on.dtype + left_on_0 = left_on[0] + if is_array_like(left_on_0): + lo_dtype = left_on_0.dtype else: lo_dtype = ( - self.left[left_on].dtype - if left_on in self.left.columns - else self.left.index.get_level_values(left_on) + self.left[left_on_0].dtype + if left_on_0 in self.left.columns + else self.left.index.get_level_values(left_on_0) ) else: lo_dtype = self.left.index.dtype if not self.right_index: - right_on = self.right_on[0] - if is_array_like(right_on): - ro_dtype = right_on.dtype + right_on_0 = right_on[0] + if is_array_like(right_on_0): + ro_dtype = right_on_0.dtype else: ro_dtype = ( - self.right[right_on].dtype - if right_on in self.right.columns - else self.right.index.get_level_values(right_on) + self.right[right_on_0].dtype + if right_on_0 in self.right.columns + else self.right.index.get_level_values(right_on_0) ) else: ro_dtype = self.right.index.dtype @@ -1839,13 +1841,15 @@ def _validate_specification(self) -> None: if len(self.left_by) != len(self.right_by): raise MergeError("left_by and right_by must be same length") - self.left_on = self.left_by + list(self.left_on) - self.right_on = self.right_by + list(self.right_on) + left_on = self.left_by + list(left_on) + right_on = self.right_by + list(right_on) # check 'direction' is valid if self.direction not in ["backward", "forward", "nearest"]: raise MergeError(f"direction invalid: {self.direction}") + return left_on, right_on + def _get_merge_keys(self): # note this function has side effects