Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3060,11 +3060,6 @@ impl ScalarValue {
cast_options: &CastOptions<'static>,
) -> Result<Self> {
let scalar_array = match (self, target_type) {
(
ScalarValue::Float64(Some(float_ts)),
DataType::Timestamp(TimeUnit::Nanosecond, None),
) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
.to_array()?,
(
ScalarValue::Decimal128(Some(decimal_value), _, scale),
DataType::Timestamp(time_unit, None),
Expand Down
17 changes: 16 additions & 1 deletion datafusion/functions/src/datetime/to_timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ use std::any::Any;
use std::sync::Arc;

use crate::datetime::common::*;
use arrow::array::Float64Array;
use arrow::datatypes::DataType::*;
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{
ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
};
use datafusion_common::format::DEFAULT_CAST_OPTIONS;
use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
Expand Down Expand Up @@ -319,9 +321,22 @@ impl ScalarUDFImpl for ToTimestampFunc {
Int32 | Int64 => args[0]
.cast_to(&Timestamp(Second, None), None)?
.cast_to(&Timestamp(Nanosecond, None), None),
Null | Float64 | Timestamp(_, None) => {
Null | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Nanosecond, None), None)
}
Float64 => {
let rescaled = arrow::compute::kernels::numeric::mul(
&args[0].to_array(1)?,
&arrow::array::Scalar::new(Float64Array::from(vec![
1_000_000_000f64,
])),
)?;
Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
&rescaled,
&Timestamp(Nanosecond, None),
&DEFAULT_CAST_OPTIONS,
)?))
}
Timestamp(_, Some(tz)) => {
args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
}
Expand Down
117 changes: 113 additions & 4 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,115 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01'
2000-01-01T01:01:01Z


##########
## cast tests
##########

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1 AS float) AS timestamp(0))) AS t1,
(SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t2,
(SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t3,
(SELECT CAST(CAST(1 AS double) AS timestamp(0))) AS t4,
(SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t5,
(SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t6
)
----
true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1 AS float) AS timestamp(3))) AS t1,
(SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t2,
(SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t3,
(SELECT CAST(CAST(1 AS double) AS timestamp(3))) AS t4,
(SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t5,
(SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t6
)
----
true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1 AS float) AS timestamp(6))) AS t1,
(SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t2,
(SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t3,
(SELECT CAST(CAST(1 AS double) AS timestamp(6))) AS t4,
(SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t5,
(SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t6
)
----
true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1 AS float) AS timestamp(9))) AS t1,
(SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t2,
(SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t3,
(SELECT CAST(CAST(1 AS double) AS timestamp(9))) AS t4,
(SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t5,
(SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t6
)
----
true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1.125 AS float) AS timestamp(0))) AS t1,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
(SELECT CAST(CAST(1.125 AS double) AS timestamp(0))) AS t4,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
)
----
true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1.125 AS float) AS timestamp(3))) AS t1,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
(SELECT CAST(CAST(1.125 AS double) AS timestamp(3))) AS t4,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
)
----
true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1.125 AS float) AS timestamp(6))) AS t1,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
(SELECT CAST(CAST(1.125 AS double) AS timestamp(6))) AS t4,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
)
----
true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001

query BPPPPPP
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
FROM (SELECT
(SELECT CAST(CAST(1.125 AS float) AS timestamp(9))) AS t1,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
(SELECT CAST(CAST(1.125 AS double) AS timestamp(9))) AS t4,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
)
----
true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001


##########
## to_timestamp tests
##########
Expand Down Expand Up @@ -394,12 +503,12 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08 12
query PPP
SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp as c3;
----
1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100
1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I read this difference right as now cast(float_col AS timestamp) is treated as though it is 1.1 ns where as before it was treated as 1.1 sec?

What do we think about simply not supporting explicit conversion from float --> timestamp to follow the duckdb/postgres model. That feels far more defensible to me than this behavior which is both different than it was previously AND not consistent with other engines

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both different than it was previously

it's NOT!

it's different ONLY in constant folding context.

BTW to_timestamp(1.1) works the way it seems to work on constant folding context only too, i filed #16678 for this, but this PR fixes this problem as well.

What do we think about simply not supporting explicit conversion from float --> timestamp

I am supportive of that. Same for decimals and perhaps ints.

But it's definitely more work and more controversial change.
So we should first fix #16636, #16531 and #16678 which are code bugs bringing embarrassment to the project and data corruption to the users.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thank you, I double checked and reminded myself of what was going on:

DataFusion CLI v48.0.0
> select cast(1.1 as timestamp);
+-------------------------+
| Float64(1.1)            |
+-------------------------+
| 1970-01-01T00:00:01.100 |
+-------------------------+
1 row(s) fetched.
Elapsed 0.005 seconds.

> select cast(column1 as timestamp) from values (1.1);
+-------------------------------+
| column1                       |
+-------------------------------+
| 1970-01-01T00:00:00.000000001 |
+-------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. I tried to capture this in the linked issue #16636
Thanks for bring this example here.


query PPP
SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2, (-1.1)::timestamp as c3;
----
1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900
1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999

query PPP
SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as c3;
Expand All @@ -409,12 +518,12 @@ SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as
query PPP
SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2, 1.23456789::timestamp as c3;
----
1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890
1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001

query PPP
SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as timestamp) as c2, 123456789.123456789::timestamp as c3;
----
1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784
1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789

# to_timestamp Decimal128 inputs

Expand Down