Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,12 @@ class Date64Converter
public:
Status AppendItem(PyObject* obj) {
int64_t t;
if (PyDate_Check(obj)) {
if (PyDateTime_Check(obj)) {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might not be necessary, if we never expect a datetime object here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact I think this is wrong (we always want truncation, but I'll let the reviewer confirm).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's probably OK, but we should check whether the milliseconds are 0. I'll pull it down and take a quick look

auto pydate = reinterpret_cast<PyDateTime_DateTime*>(obj);
t = PyDateTime_to_ms(pydate);
// Truncate any intraday milliseconds
t -= t % 86400000LL;
} else if (PyDate_Check(obj)) {
auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
t = PyDate_to_ms(pydate);
} else {
Expand Down
15 changes: 9 additions & 6 deletions cpp/src/arrow/python/util/datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,6 @@ static inline int64_t PyDate_to_days(PyDateTime_Date* pydate) {

static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
int64_t total_seconds = 0;
total_seconds += PyDateTime_DATE_GET_SECOND(pydate);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

an alternative approach would be to do if (pycheck_DateTime(pydate)) here, not sure if that is preferred.

total_seconds += PyDateTime_DATE_GET_MINUTE(pydate) * 60;
total_seconds += PyDateTime_DATE_GET_HOUR(pydate) * 3600;
int64_t days =
get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
PyDateTime_GET_DAY(pydate));
Expand All @@ -283,17 +280,23 @@ static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
}

static inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) {
return PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL;
int64_t total_seconds = 0;
total_seconds += PyDateTime_DATE_GET_SECOND(pydatetime);
total_seconds += PyDateTime_DATE_GET_MINUTE(pydatetime) * 60;
total_seconds += PyDateTime_DATE_GET_HOUR(pydatetime) * 3600;

return total_seconds +
(PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL);
}

static inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) {
int64_t date_ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
int64_t date_ms = PyDateTime_to_s(pydatetime) * 1000;
int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000;
return date_ms + ms;
}

static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
int64_t ms = PyDateTime_to_s(pydatetime) * 1000;
int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
return ms * 1000 + us;
}
Expand Down
9 changes: 5 additions & 4 deletions python/pyarrow/scalar.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ cdef class Date32Value(ArrayValue):
cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()

# Shift to seconds since epoch
return datetime.datetime.utcfromtimestamp(
int(ap.Value(self.index)) * 86400).date()
return (datetime.date(1970, 1, 1) +
datetime.timedelta(days=ap.Value(self.index)))


cdef class Date64Value(ArrayValue):
Expand All @@ -237,8 +237,9 @@ cdef class Date64Value(ArrayValue):
Return this value as a Python datetime.datetime instance.
"""
cdef CDate64Array* ap = <CDate64Array*> self.sp_array.get()
return datetime.datetime.utcfromtimestamp(
ap.Value(self.index) / 1000).date()
return (datetime.date(1970, 1, 1) +
datetime.timedelta(
days=ap.Value(self.index) / 86400000))


cdef class Time32Value(ArrayValue):
Expand Down
12 changes: 12 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,18 @@ def test_cast_date64_to_int():
assert result.equals(expected)


def test_date64_from_builtin_datetime():
val1 = datetime.datetime(2000, 1, 1, 12, 34, 56, 123456)
val2 = datetime.datetime(2000, 1, 1)
result = pa.array([val1, val2], type='date64')
result2 = pa.array([val1.date(), val2.date()], type='date64')

assert result.equals(result2)

as_i8 = result.view('int64')
assert as_i8[0].as_py() == as_i8[1].as_py()


@pytest.mark.parametrize(('ty', 'values'), [
('bool', [True, False, True]),
('uint8', range(0, 255)),
Expand Down
11 changes: 11 additions & 0 deletions python/pyarrow/tests/test_scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# specific language governing permissions and limitations
# under the License.

import datetime
import pytest

import numpy as np
Expand Down Expand Up @@ -218,6 +219,16 @@ def test_large_list(self):
v = arr[3]
assert len(v) == 0

def test_date(self):
# ARROW-5125
d1, d2 = datetime.date(3200, 1, 1), datetime.date(1960, 1, 1),
extremes = pa.array([d1, d2], type=pa.date32())
assert extremes[0] == d1
assert extremes[1] == d2
extremes = pa.array([d1, d2], type=pa.date64())
assert extremes[0] == d1
assert extremes[1] == d2

@pytest.mark.pandas
def test_timestamp(self):
import pandas as pd
Expand Down