Skip to content

Commit

Permalink
Remove upperbound pandas 1.2 (#2141)
Browse files Browse the repository at this point in the history
Fix several incompatible behavior with pandas in `iLocIndexer` to remove upperbound pandas 1.2.
  • Loading branch information
itholic committed Apr 8, 2021
1 parent 0fd088e commit c8f803d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 53 deletions.
19 changes: 19 additions & 0 deletions databricks/koalas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,25 @@ def _select_cols_else(
)

def __setitem__(self, key, value):
if is_list_like(value) and not isinstance(value, spark.Column):
iloc_item = self[key]
if not is_list_like(key) or not is_list_like(iloc_item):
raise ValueError("setting an array element with a sequence.")
else:
shape_iloc_item = iloc_item.shape
len_iloc_item = shape_iloc_item[0]
len_value = len(value)
if len_iloc_item != len_value:
if self._is_series:
raise ValueError(
"cannot set using a list-like indexer with a different length than "
"the value"
)
else:
raise ValueError(
"shape mismatch: value array of shape ({},) could not be broadcast "
"to indexing result of shape {}".format(len_value, shape_iloc_item)
)
super().__setitem__(key, value)
# Update again with resolved_copy to drop extra columns.
self._kdf._update_internal_frame(
Expand Down
15 changes: 7 additions & 8 deletions databricks/koalas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,7 @@ def test_frame_iloc_setitem(self):
kdf.iloc[0, 1] = 50
self.assert_eq(kdf, pdf)

with self.assertRaisesRegex(ValueError, "Incompatible indexer with Series"):
with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."):
kdf.iloc[0, 0] = -kdf.max_speed
with self.assertRaisesRegex(ValueError, "shape mismatch"):
kdf.iloc[:, [1, 0]] = -kdf.max_speed
Expand Down Expand Up @@ -1226,14 +1226,13 @@ def test_series_iloc_setitem(self):
self.assert_eq(kser, pser)
self.assert_eq(kdf, pdf)

# TODO: matching the behavior with pandas 1.2 and uncomment below test.
# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser.iloc[[1]] = -kdf.b
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser.iloc[[1]] = -kdf.b

with self.assertRaisesRegex(ValueError, "Incompatible indexer with DataFrame"):
with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."):
kser.iloc[1] = kdf[["b"]]

def test_iloc_raises(self):
Expand Down
76 changes: 33 additions & 43 deletions databricks/koalas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,25 +1149,17 @@ def test_frame_iloc_setitem(self):
pdf.iloc[[0, 1, 2], 1] = -pdf.max_speed
self.assert_eq(kdf, pdf)

# TODO: matching the behavior with pandas 1.2 and uncomment below test
# with self.assertRaisesRegex(
# ValueError,
# "shape mismatch: value array of shape (3,) could not be broadcast to indexing "
# "result of shape (2,1)",
# ):
# kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed
with self.assertRaisesRegex(
ValueError, "shape mismatch",
):
kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed

kdf.iloc[[0, 1, 2], 1] = 10 * another_kdf.max_speed
pdf.iloc[[0, 1, 2], 1] = 10 * pdf.max_speed
self.assert_eq(kdf, pdf)

# TODO: matching the behavior with pandas 1.2 and uncomment below test
# with self.assertRaisesRegex(
# ValueError,
# "shape mismatch: value array of shape (3,) could not be broadcast to indexing "
# "result of shape (1,)",
# ):
# kdf.iloc[[0], 1] = 10 * another_kdf.max_speed
with self.assertRaisesRegex(ValueError, "shape mismatch"):
kdf.iloc[[0], 1] = 10 * another_kdf.max_speed

def test_series_loc_setitem(self):
pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
Expand Down Expand Up @@ -1267,36 +1259,35 @@ def test_series_iloc_setitem(self):
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# TODO: matching the behavior with pandas 1.2 and uncomment below test.
# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser.iloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser.iloc[[1, 2]] = -kser_another

kser.iloc[[0, 1, 2]] = 10 * kser_another
pser.iloc[[0, 1, 2]] = 10 * pser_another
self.assert_eq(kser, pser)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser.iloc[[0]] = 10 * kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser.iloc[[0]] = 10 * kser_another

kser1.iloc[[0, 1, 2]] = -kser_another
pser1.iloc[[0, 1, 2]] = -pser_another
self.assert_eq(kser1, pser1)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser1.iloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser1.iloc[[1, 2]] = -kser_another

pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
kdf = ks.from_pandas(pdf)
Expand All @@ -1315,24 +1306,23 @@ def test_series_iloc_setitem(self):
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# TODO: matching the behavior with pandas 1.2 and uncomment below test.
# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kiloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kiloc[[1, 2]] = -kser_another

kiloc[[0, 1, 2]] = 10 * kser_another
piloc[[0, 1, 2]] = 10 * pser_another
self.assert_eq(kser, pser)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kiloc[[0]] = 10 * kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kiloc[[0]] = 10 * kser_another

def test_update(self):
pdf = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
Expand Down Expand Up @@ -1861,7 +1851,7 @@ def test_frame_iloc_setitem(self):
another_kdf = ks.DataFrame(pdf)

with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kdf.iloc[[1, 2], [1]] = another_kdf.max_speed
kdf.iloc[[1, 2], [1]] = another_kdf.max_speed.iloc[[1, 2]]

def test_series_loc_setitem(self):
pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
Expand All @@ -1887,7 +1877,7 @@ def test_series_iloc_setitem(self):
kser_another = ks.from_pandas(pser_another)

with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kser.iloc[[1]] = -kser_another
kser.iloc[[1]] = -kser_another.iloc[[1]]

def test_where(self):
pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Dependencies in Koalas. When you update don't forget to update setup.py and install.rst in docs.
pandas>=0.23.2,<1.2.0
pandas>=0.23.2
pyarrow>=0.10
numpy>=1.14,<1.20.0

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
},
python_requires='>=3.5,<3.9',
install_requires=[
'pandas>=0.23.2,<1.2.0',
'pandas>=0.23.2',
'pyarrow>=0.10',
'numpy>=1.14,<1.20.0',
],
Expand Down

0 comments on commit c8f803d

Please sign in to comment.