Skip to content

Commit

Permalink
BUG: 2D ndarray of dtype 'object' is always copied upon construction (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
irgolic committed Jan 19, 2021
1 parent edbd450 commit a11e99f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
28 changes: 19 additions & 9 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
constructors before passing them to a BlockManager.
"""
from collections import abc
from itertools import groupby
from operator import itemgetter
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -240,18 +242,26 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
if values.ndim == 2 and values.shape[0] != 1:
# transpose and separate blocks

dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
for n in range(len(dvals_list)):
if isinstance(dvals_list[n], np.ndarray):
dvals_list[n] = dvals_list[n].reshape(1, -1)
dvals_list = (maybe_infer_to_datetimelike(row) for row in values)

from pandas.core.internals.blocks import make_block

# TODO: What about re-joining object columns?
block_values = [
make_block(dvals_list[n], placement=[n], ndim=2)
for n in range(len(dvals_list))
]
i = 0
block_values = []
for is_object, group in groupby(dvals_list,
lambda row: is_object_dtype(row.dtype)):
group = list(group)
ei = i + len(group)
if is_object:
block_values.append(values[i:ei])
else:
block_values.extend(
make_block(row.reshape(1, -1) if isinstance(row, np.ndarray)
else row,
placement=[i + incr], ndim=2)
for incr, row in enumerate(group)
)
i = ei

else:
datelike_vals = maybe_infer_to_datetimelike(values)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2267,6 +2267,15 @@ def test_nested_dict_construction(self):
)
tm.assert_frame_equal(result, expected)

def test_object_array_does_not_copy(self):
a = np.array(['a', 'b'], dtype='object')
b = np.array([['a', 'b'],
['c', 'd']], dtype='object')
df = pd.DataFrame(a)
assert np.shares_memory(df.values, a)
df2 = pd.DataFrame(b)
assert np.shares_memory(df2.values, b)

def test_from_tzaware_object_array(self):
# GH#26825 2D object array of tzaware timestamps should not raise
dti = date_range("2016-04-05 04:30", periods=3, tz="UTC")
Expand Down

0 comments on commit a11e99f

Please sign in to comment.