From 054bef94ca7e84ff8e2e27af65e00e183f7be6da Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sat, 25 Apr 2020 09:02:16 +0900 Subject: [PATCH] [SPARK-31491][SQL][DOCS] Re-arrange Data Types page to document Floating Point Special Values ### What changes were proposed in this pull request? Re-arrange Data Types page to document Floating Point Special Values ### Why are the changes needed? To complete SQL Reference ### Does this PR introduce any user-facing change? Yes - add Floating Point Special Values in Data Types page - move NaN Semantics to Data Types page Screen Shot 2020-04-24 at 9 14 57 AM Screen Shot 2020-04-24 at 9 15 22 AM Screen Shot 2020-04-24 at 9 15 44 AM ### How was this patch tested? Manually build and check Closes #28264 from huaxingao/datatypes. Authored-by: Huaxin Gao Signed-off-by: Takeshi Yamamuro --- docs/_data/menu-sql.yaml | 2 - docs/sql-ref-datatypes.md | 119 ++++++++++++++++++++++++++++++++++ docs/sql-ref-nan-semantics.md | 29 --------- 3 files changed, 119 insertions(+), 31 deletions(-) delete mode 100644 docs/sql-ref-nan-semantics.md diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 26cca61a9d6ac..1097079e4597d 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -84,8 +84,6 @@ url: sql-ref-literals.html - text: Null Semantics url: sql-ref-null-semantics.html - - text: NaN Semantics - url: sql-ref-nan-semantics.html - text: ANSI Compliance url: sql-ref-ansi-compliance.html subitems: diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md index 150e194ae6cde..0d49f6f882228 100644 --- a/docs/sql-ref-datatypes.md +++ b/docs/sql-ref-datatypes.md @@ -19,6 +19,8 @@ license: | limitations under the License. --- +### Supported Data Types + Spark SQL and DataFrames support the following data types: * Numeric types @@ -706,3 +708,120 @@ The following table shows the type names as well as aliases used in Spark SQL pa + +### Floating Point Special Values + +Spark SQL supports several special floating point values in a case-insensitive manner: + + * Inf/+Inf/Infinity/+Infinity: positive infinity + * ```FloatType```: equivalent to Scala Float.PositiveInfinity. + * ```DoubleType```: equivalent to Scala Double.PositiveInfinity. + * -Inf/-Infinity: negative infinity + * ```FloatType```: equivalent to Scala Float.NegativeInfinity. + * ```DoubleType```: equivalent to Scala Double.NegativeInfinity. + * NaN: not a number + * ```FloatType```: equivalent to Scala Float.NaN. + * ```DoubleType```: equivalent to Scala Double.NaN. + +#### Positive/Negative Infinity Semantics + +There is special handling for positive and negative infinity. They have the following semantics: + + * Positive infinity multiplied by any positive value returns positive infinity. + * Negative infinity multiplied by any positive value returns negative infinity. + * Positive infinity multiplied by any negative value returns negative infinity. + * Negative infinity multiplied by any negative value returns positive infinity. + * Positive/negative infinity multiplied by 0 returns NaN. + * Positive/negative infinity is equal to itself. + * In aggregations, all positive infinity values are grouped together. Similarly, all negative infinity values are grouped together. + * Positive infinity and negative infinity are treated as normal values in join keys. + * Positive infinity sorts lower than NaN and higher than any other values. + * Negative infinity sorts lower than any other values. + +#### NaN Semantics + +There is special handling for not-a-number (NaN) when dealing with `float` or `double` types that +do not exactly match standard floating point semantics. +Specifically: + + * NaN = NaN returns true. + * In aggregations, all NaN values are grouped together. + * NaN is treated as a normal value in join keys. + * NaN values go last when in ascending order, larger than any other numeric value. + +#### Examples + +{% highlight sql %} +SELECT double('infinity') AS col; ++--------+ +| col| ++--------+ +|Infinity| ++--------+ + +SELECT float('-inf') AS col; ++---------+ +| col| ++---------+ +|-Infinity| ++---------+ + +SELECT float('NaN') AS col; ++---+ +|col| ++---+ +|NaN| ++---+ + +SELECT double('infinity') * 0 AS col; ++---+ +|col| ++---+ +|NaN| ++---+ + +SELECT double('-infinity') * (-1234567) AS col; ++--------+ +| col| ++--------+ +|Infinity| ++--------+ + +SELECT double('infinity') < double('NaN') AS col; ++----+ +| col| ++----+ +|true| ++----+ + +SELECT double('NaN') = double('NaN') AS col; ++----+ +| col| ++----+ +|true| ++----+ + +SELECT double('inf') = double('infinity') AS col; ++----+ +| col| ++----+ +|true| ++----+ + +CREATE TABLE test (c1 int, c2 double); +INSERT INTO test VALUES (1, double('infinity')); +INSERT INTO test VALUES (2, double('infinity')); +INSERT INTO test VALUES (3, double('inf')); +INSERT INTO test VALUES (4, double('-inf')); +INSERT INTO test VALUES (5, double('NaN')); +INSERT INTO test VALUES (6, double('NaN')); +INSERT INTO test VALUES (7, double('-infinity')); +SELECT COUNT(*), c2 FROM test GROUP BY c2; ++---------+---------+ +| count(1)| c2| ++---------+---------+ +| 2| NaN| +| 2|-Infinity| +| 3| Infinity| ++---------+---------+ +{% endhighlight %} \ No newline at end of file diff --git a/docs/sql-ref-nan-semantics.md b/docs/sql-ref-nan-semantics.md deleted file mode 100644 index f6a85728263a7..0000000000000 --- a/docs/sql-ref-nan-semantics.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -layout: global -title: Nan Semantics -displayTitle: NaN Semantics -license: | - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---- - -There is specially handling for not-a-number (NaN) when dealing with `float` or `double` types that -does not exactly match standard floating point semantics. -Specifically: - - - NaN = NaN returns true. - - In aggregations, all NaN values are grouped together. - - NaN is treated as a normal value in join keys. - - NaN values go last when in ascending order, larger than any other numeric value.