Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions python/doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ Scalar Value Types
StringValue
FixedSizeBinaryValue

Array Types
-----------
Array Types and Constructors
----------------------------

.. autosummary::
:toctree: generated/

array
Array
NumericArray
IntegerArray
Expand Down
17 changes: 8 additions & 9 deletions python/doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,26 +90,26 @@ using the default system install location will work, but for now we are being
explicit:

.. code-block:: bash

export ARROW_HOME=$HOME/local

Now, we build Arrow:

.. code-block:: bash

cd arrow/cpp

mkdir dev-build
cd dev-build

cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..

make

# Use sudo here if $ARROW_HOME requires it
make install

To get the optional Parquet support, you should also build and install
To get the optional Parquet support, you should also build and install
`parquet-cpp <https://github.com/apache/parquet-cpp/blob/master/README.md>`_.

Install `pyarrow`
Expand Down Expand Up @@ -138,15 +138,14 @@ Install `pyarrow`


.. code-block:: python

In [1]: import pyarrow

In [2]: pyarrow.from_pylist([1,2,3])
In [2]: pyarrow.array([1,2,3])
Out[2]:
<pyarrow.array.Int64Array object at 0x7f899f3e60e8>
[
1,
2,
3
]

2 changes: 1 addition & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
DataType, FixedSizeBinaryType,
Field, Schema, schema,
Array, Tensor,
from_pylist,
array,
from_numpy_dtype,
NumericArray, IntegerArray, FloatingPointArray,
BooleanArray,
Expand Down
69 changes: 36 additions & 33 deletions python/pyarrow/_array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,42 @@ cdef maybe_coerce_datetime64(values, dtype, DataType type,
return values, type



def array(object sequence, DataType type=None, MemoryPool memory_pool=None):
"""
Create pyarrow.Array instance from a Python sequence

Parameters
----------
sequence : sequence-like object of Python objects
type : pyarrow.DataType, optional
If not passed, will be inferred from the data
memory_pool : pyarrow.MemoryPool, optional
If not passed, will allocate memory from the currently-set default
memory pool

Returns
-------
array : pyarrow.Array
"""
cdef:
shared_ptr[CArray] sp_array
CMemoryPool* pool

pool = maybe_unbox_memory_pool(memory_pool)
if type is None:
check_status(pyarrow.ConvertPySequence(sequence, pool, &sp_array))
else:
check_status(
pyarrow.ConvertPySequence(
sequence, pool, &sp_array, type.sp_type
)
)

return box_array(sp_array)



cdef class Array:

cdef init(self, const shared_ptr[CArray]& sp_array):
Expand Down Expand Up @@ -891,36 +927,6 @@ cdef class Array:

return box_array(out)

@staticmethod
def from_list(object list_obj, DataType type=None,
MemoryPool memory_pool=None):
"""
Convert Python list to Arrow array

Parameters
----------
list_obj : array_like

Returns
-------
pyarrow.array.Array
"""
cdef:
shared_ptr[CArray] sp_array
CMemoryPool* pool

pool = maybe_unbox_memory_pool(memory_pool)
if type is None:
check_status(pyarrow.ConvertPySequence(list_obj, pool, &sp_array))
else:
check_status(
pyarrow.ConvertPySequence(
list_obj, pool, &sp_array, type.sp_type
)
)

return box_array(sp_array)

property null_count:

def __get__(self):
Expand Down Expand Up @@ -1363,6 +1369,3 @@ cdef object get_series_values(object obj):
result = PandasSeries(obj).values

return result


from_pylist = Array.from_list
4 changes: 2 additions & 2 deletions python/pyarrow/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,9 @@ def dictionary(self):
# Only integer and string partition types are supported right now
try:
integer_keys = [int(x) for x in self.keys]
dictionary = _array.from_pylist(integer_keys)
dictionary = _array.array(integer_keys)
except ValueError:
dictionary = _array.from_pylist(self.keys)
dictionary = _array.array(self.keys)

self._dictionary = dictionary
return dictionary
Expand Down
18 changes: 9 additions & 9 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def test_repr_on_pre_init_array():


def test_getitem_NA():
arr = pa.from_pylist([1, None, 2])
arr = pa.array([1, None, 2])
assert arr[1] is pa.NA


def test_list_format():
arr = pa.from_pylist([[1], None, [2, 3, None]])
arr = pa.array([[1], None, [2, 3, None]])
result = fmt.array_format(arr)
expected = """\
[
Expand All @@ -55,7 +55,7 @@ def test_list_format():


def test_string_format():
arr = pa.from_pylist(['', None, 'foo'])
arr = pa.array(['', None, 'foo'])
result = fmt.array_format(arr)
expected = """\
[
Expand All @@ -67,7 +67,7 @@ def test_string_format():


def test_long_array_format():
arr = pa.from_pylist(range(100))
arr = pa.array(range(100))
result = fmt.array_format(arr, window=2)
expected = """\
[
Expand All @@ -83,7 +83,7 @@ def test_long_array_format():
def test_to_pandas_zero_copy():
import gc

arr = pa.from_pylist(range(10))
arr = pa.array(range(10))

for i in range(10):
np_arr = arr.to_pandas()
Expand All @@ -93,7 +93,7 @@ def test_to_pandas_zero_copy():
assert sys.getrefcount(arr) == 2

for i in range(10):
arr = pa.from_pylist(range(10))
arr = pa.array(range(10))
np_arr = arr.to_pandas()
arr = None
gc.collect()
Expand All @@ -108,14 +108,14 @@ def test_to_pandas_zero_copy():


def test_array_slice():
arr = pa.from_pylist(range(10))
arr = pa.array(range(10))

sliced = arr.slice(2)
expected = pa.from_pylist(range(2, 10))
expected = pa.array(range(2, 10))
assert sliced.equals(expected)

sliced2 = arr.slice(2, 4)
expected2 = pa.from_pylist(range(2, 6))
expected2 = pa.array(range(2, 6))
assert sliced2.equals(expected2)

# 0 offset
Expand Down
Loading