Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on

```bash
pip install -r doc/requirements.txt
python setup.py build_sphinx
python setup.py build_sphinx -s doc/source
```

[1]: https://github.com/apache/parquet-cpp
12 changes: 12 additions & 0 deletions python/doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,15 @@ Interprocess Communication and Messaging
FileWriter
StreamReader
StreamWriter

Memory Pools
------------

.. autosummary::
:toctree: generated/

MemoryPool
default_memory_pool
jemalloc_memory_pool
total_allocated_bytes
set_memory_pool
8 changes: 2 additions & 6 deletions python/doc/source/jemalloc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,14 @@ operations.
.. code:: python

import pyarrow as pa
import pyarrow.jemalloc
import pyarrow.memory

jemalloc_pool = pyarrow.jemalloc.default_pool()
jemalloc_pool = pyarrow.jemalloc_memory_pool()

# Explicitly use jemalloc for allocating memory for an Arrow Table object
array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool)

# Set the global pool
pyarrow.memory.set_default_pool(jemalloc_pool)
pyarrow.set_memory_pool(jemalloc_pool)
# This operation has no explicit MemoryPool specified and will thus will
# also use jemalloc for its allocations.
array = pa.Array.from_pylist([1, 2, 3])


13 changes: 12 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
memory_map, create_memory_map,
get_record_batch_size, get_tensor_size)

from pyarrow._memory import MemoryPool, total_allocated_bytes
from pyarrow._memory import (MemoryPool, total_allocated_bytes,
set_memory_pool, default_memory_pool)
from pyarrow._table import Column, RecordBatch, Table, concat_tables
from pyarrow._error import (ArrowException,
ArrowKeyError,
Expand All @@ -72,6 +73,16 @@
ArrowNotImplementedError,
ArrowTypeError)


def jemalloc_memory_pool():
"""
Returns a jemalloc-based memory allocator, which can be passed to
pyarrow.set_memory_pool
"""
from pyarrow._jemalloc import default_pool
return default_pool()


from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem

from pyarrow.ipc import FileReader, FileWriter, StreamReader, StreamWriter
Expand Down
12 changes: 9 additions & 3 deletions python/pyarrow/_memory.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,37 @@
from pyarrow.includes.libarrow cimport CMemoryPool, CLoggingMemoryPool
from pyarrow.includes.pyarrow cimport set_default_memory_pool, get_memory_pool


cdef class MemoryPool:
cdef init(self, CMemoryPool* pool):
self.pool = pool

def bytes_allocated(self):
return self.pool.bytes_allocated()


cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
return get_memory_pool()
else:
return memory_pool.pool


cdef class LoggingMemoryPool(MemoryPool):
pass

def default_pool():
cdef:

def default_memory_pool():
cdef:
MemoryPool pool = MemoryPool()
pool.init(get_memory_pool())
return pool

def set_default_pool(MemoryPool pool):

def set_memory_pool(MemoryPool pool):
set_default_memory_pool(pool.pool)


def total_allocated_bytes():
cdef CMemoryPool* pool = get_memory_pool()
return pool.bytes_allocated()
34 changes: 19 additions & 15 deletions python/pyarrow/tests/test_jemalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,50 @@
import gc
import pytest

import pyarrow as pa


try:
import pyarrow.jemalloc
pa.jemalloc_memory_pool()
HAVE_JEMALLOC = True
except ImportError:
HAVE_JEMALLOC = False


jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC,
reason='jemalloc support not built')


@jemalloc
def test_different_memory_pool():
gc.collect()
bytes_before_default = pyarrow.total_allocated_bytes()
bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
bytes_before_default = pa.total_allocated_bytes()
bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

# it works
array = pyarrow.from_pylist([1, None, 3, None], # noqa
memory_pool=pyarrow.jemalloc.default_pool())
array = pa.from_pylist([1, None, 3, None], # noqa
memory_pool=pa.jemalloc_memory_pool())
gc.collect()
assert pyarrow.total_allocated_bytes() == bytes_before_default
assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
assert pa.total_allocated_bytes() == bytes_before_default
assert (pa.jemalloc_memory_pool().bytes_allocated() >
bytes_before_jemalloc)


@jemalloc
def test_default_memory_pool():
gc.collect()
bytes_before_default = pyarrow.total_allocated_bytes()
bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
bytes_before_default = pa.total_allocated_bytes()
bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

old_memory_pool = pyarrow.memory.default_pool()
pyarrow.memory.set_default_pool(pyarrow.jemalloc.default_pool())
old_memory_pool = pa.default_memory_pool()
pa.set_memory_pool(pa.jemalloc_memory_pool())

array = pyarrow.from_pylist([1, None, 3, None]) # noqa
array = pa.from_pylist([1, None, 3, None]) # noqa

pyarrow.memory.set_default_pool(old_memory_pool)
pa.set_memory_pool(old_memory_pool)
gc.collect()

assert pyarrow.total_allocated_bytes() == bytes_before_default
assert pa.total_allocated_bytes() == bytes_before_default

assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
assert (pa.jemalloc_memory_pool().bytes_allocated() >
bytes_before_jemalloc)