Skip to content

Commit

Permalink
ARROW-6510: [Python][Filesystem] Expose nanosecond resolution mtime
Browse files Browse the repository at this point in the history
Also change FileInfo.mtime to return an aware UTC datetime.

Closes #6882 from pitrou/ARROW-6510-py-fs-mtime-ns

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
pitrou authored and wesm committed Apr 9, 2020
1 parent 40e2c21 commit edd88d7
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 28 deletions.
7 changes: 0 additions & 7 deletions cpp/src/arrow/python/datetime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,6 @@ Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** ou
return Status::OK();
}

Status PyDateTime_from_TimePoint(TimePoint val, PyObject** out) {
auto nanos = val.time_since_epoch();
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(nanos);
RETURN_NOT_OK(PyDateTime_from_int(micros.count(), TimeUnit::MICRO, out));
return Status::OK();
}

int64_t PyDate_to_days(PyDateTime_Date* pydate) {
return get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
PyDateTime_GET_DAY(pydate));
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/arrow/python/datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,14 @@ Status PyTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out);
ARROW_PYTHON_EXPORT
Status PyDate_from_int(int64_t val, const DateUnit unit, PyObject** out);

// WARNING: This function returns a naive datetime.
ARROW_PYTHON_EXPORT
Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out);

// This declaration must be the same as in filesystem/filesystem.h
using TimePoint =
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

ARROW_PYTHON_EXPORT
Status PyDateTime_from_TimePoint(TimePoint val, PyObject** out);

ARROW_PYTHON_EXPORT
int64_t PyDate_to_days(PyDateTime_Date* pydate);

Expand Down Expand Up @@ -117,6 +116,9 @@ inline TimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime) {
return TimePoint(TimePoint::duration(PyDateTime_to_ns(pydatetime)));
}

ARROW_PYTHON_EXPORT
inline int64_t TimePoint_to_ns(TimePoint val) { return val.time_since_epoch().count(); }

ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_s(PyDateTime_Delta* pytimedelta) {
int64_t total_seconds = 0;
Expand Down
1 change: 0 additions & 1 deletion python/pyarrow/_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from pyarrow.compat import frombytes, tobytes
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport PyDateTime_from_TimePoint
from pyarrow.includes.libarrow_fs cimport *
from pyarrow.lib import _detect_compression
from pyarrow.lib cimport *
Expand Down
23 changes: 17 additions & 6 deletions python/pyarrow/_fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ from cpython.datetime cimport datetime, PyDateTime_DateTime

from pyarrow.compat import frombytes, tobytes
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport (
PyDateTime_from_TimePoint, PyDateTime_to_TimePoint
)
from pyarrow.includes.libarrow cimport PyDateTime_to_TimePoint
from pyarrow.lib import _detect_compression
from pyarrow.lib cimport *
from pyarrow.util import _stringify_path

from datetime import timezone
import pathlib


Expand Down Expand Up @@ -150,9 +149,21 @@ cdef class FileInfo:
-------
mtime : datetime.datetime
"""
cdef PyObject *out
check_status(PyDateTime_from_TimePoint(self.info.mtime(), &out))
return PyObject_to_object(out)
cdef int64_t nanoseconds
nanoseconds = TimePoint_to_ns(self.info.mtime())
return datetime.fromtimestamp(nanoseconds / 1.0e9, timezone.utc)

@property
def mtime_ns(self):
"""
The time of last modification, if available, expressed in nanoseconds
since the Unix epoch.
Returns
-------
mtime_ns : int
"""
return TimePoint_to_ns(self.info.mtime())


cdef class FileSelector:
Expand Down
4 changes: 1 addition & 3 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1724,10 +1724,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
pass

CStatus PyDateTime_from_int(int64_t val, const TimeUnit unit,
PyObject** out)
CStatus PyDateTime_from_TimePoint(CTimePoint val, PyObject** out)
CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
int64_t TimePoint_to_ns(CTimePoint val)


cdef extern from 'arrow/python/init.h':
Expand Down
3 changes: 0 additions & 3 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
CFileType_File "arrow::fs::FileType::File"
CFileType_Directory "arrow::fs::FileType::Directory"

cdef cppclass CTimePoint "arrow::fs::TimePoint":
pass

cdef cppclass CFileInfo "arrow::fs::FileInfo":
CFileInfo()
CFileInfo(CFileInfo&&)
Expand Down
47 changes: 42 additions & 5 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from datetime import datetime, timezone, timedelta
import gzip
import pathlib
import pickle
Expand Down Expand Up @@ -255,6 +255,18 @@ class Path:
fs.create_dir(path)


def check_mtime(file_info):
assert isinstance(file_info.mtime, datetime)
assert isinstance(file_info.mtime_ns, int)
if file_info.mtime_ns >= 0:
assert file_info.mtime_ns == pytest.approx(
file_info.mtime.timestamp() * 1e9)
# It's an aware UTC datetime
tzinfo = file_info.mtime.tzinfo
assert tzinfo is not None
assert tzinfo.utcoffset(None) == timedelta(0)


def test_get_file_info(fs, pathfn):
aaa = pathfn('a/aa/aaa/')
bb = pathfn('a/bb')
Expand All @@ -273,30 +285,30 @@ def test_get_file_info(fs, pathfn):
assert 'aaa' in repr(aaa_info)
assert aaa_info.extension == ''
assert 'FileType.Directory' in repr(aaa_info)
assert isinstance(aaa_info.mtime, datetime)
check_mtime(aaa_info)

assert bb_info.path == str(bb)
assert bb_info.base_name == 'bb'
assert bb_info.extension == ''
assert bb_info.type == FileType.File
assert 'FileType.File' in repr(bb_info)
assert bb_info.size == 0
assert isinstance(bb_info.mtime, datetime)
check_mtime(bb_info)

assert c_info.path == str(c)
assert c_info.base_name == 'c.txt'
assert c_info.extension == 'txt'
assert c_info.type == FileType.File
assert 'FileType.File' in repr(c_info)
assert c_info.size == 4
assert isinstance(c_info.mtime, datetime)
check_mtime(c_info)

assert zzz_info.path == str(zzz)
assert zzz_info.base_name == 'zzz'
assert zzz_info.extension == ''
assert zzz_info.type == FileType.NotFound
assert 'FileType.NotFound' in repr(zzz_info)
assert isinstance(c_info.mtime, datetime)
check_mtime(zzz_info)


def test_get_file_info_with_selector(fs, pathfn):
Expand Down Expand Up @@ -329,6 +341,7 @@ def test_get_file_info_with_selector(fs, pathfn):
assert info.type == FileType.Directory
else:
raise ValueError('unexpected path {}'.format(info.path))
check_mtime(info)
finally:
fs.delete_file(file_a)
fs.delete_file(file_b)
Expand Down Expand Up @@ -548,6 +561,30 @@ def test_localfs_errors(localfs):
fs.copy_file('/non/existent', '/xxx')


def test_localfs_file_info(localfs):
fs = localfs['fs']

file_path = pathlib.Path(__file__)
dir_path = file_path.parent
[file_info, dir_info] = fs.get_file_info([file_path.as_posix(),
dir_path.as_posix()])
assert file_info.size == file_path.stat().st_size
assert file_info.mtime_ns == file_path.stat().st_mtime_ns
check_mtime(file_info)
assert dir_info.mtime_ns == dir_path.stat().st_mtime_ns
check_mtime(dir_info)


def test_mockfs_mtime_roundtrip(mockfs):
dt = datetime.fromtimestamp(1568799826, timezone.utc)
fs = _MockFileSystem(dt)

with fs.open_output_stream('foo'):
pass
[info] = fs.get_file_info(['foo'])
assert info.mtime == dt


@pytest.mark.s3
def test_s3_options():
from pyarrow.fs import S3FileSystem
Expand Down

0 comments on commit edd88d7

Please sign in to comment.