From b55ff8f1acacdcd109587068924cb8daf473f2bf Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 13 Aug 2022 10:38:05 -0600 Subject: [PATCH 1/3] Documentation for JOIN syntax --- datafusion-cli/Cargo.lock | 19 ++++-- docs/source/user-guide/sql/select.md | 87 ++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 7e0a4d743770..6ab6ad3105c6 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -57,9 +57,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "19.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b7e88e4739c3616cae75adce6660c9c1a80f2660545eb77afbe0e4a0f048a0" +checksum = "c72a69495f06c8abb65b76a87be192a26fa724380d1f292d4e558a32afed9989" dependencies = [ "ahash", "bitflags", @@ -75,8 +75,8 @@ dependencies = [ "lexical-core", "multiversion", "num", - "rand", "regex", + "regex-syntax", "serde", "serde_derive", "serde_json", @@ -1277,10 +1277,11 @@ dependencies = [ [[package]] name = "parquet" -version = "19.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cfcf237362047888b342e4f0e213a9b303133b085853e447f2c58e65e00099d" +checksum = "d0f0af698fcf8d1d9f2971766ebef25821ffe8c39c91837c276dcd97e075d950" dependencies = [ + "ahash", "arrow", "base64", "brotli", @@ -1289,11 +1290,13 @@ dependencies = [ "chrono", "flate2", "futures", + "hashbrown", "lz4", "num", "num-bigint", "parquet-format", "rand", + "seq-macro", "snap", "thrift", "tokio", @@ -1538,6 +1541,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "seq-macro" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0772c5c30e1a0d91f6834f8e545c69281c099dfa9a3ac58d96a9fd629c8d4898" + [[package]] name = "serde" version = "1.0.142" diff --git a/docs/source/user-guide/sql/select.md b/docs/source/user-guide/sql/select.md index 49399c93c60d..b705013118fb 100644 --- a/docs/source/user-guide/sql/select.md +++ b/docs/source/user-guide/sql/select.md @@ -28,6 +28,7 @@ DataFusion supports the following syntax for queries: [ [WITH](#with-clause) with_query [, ...] ]
[SELECT](#select-clause) [ ALL | DISTINCT ] select_expr [, ...]
[ [FROM](#from-clause) from_item [, ...] ]
+[ [JOIN](#join-clause) join_item [, ...] ]
[ [WHERE](#where-clause) condition ]
[ [GROUP BY](#group-by-clause) grouping_element [, ...] ]
[ [HAVING](#having-clause) condition]
@@ -77,6 +78,92 @@ Example: SELECT a FROM table WHERE a > 10 ``` +## JOIN clause + +DataFusion supports `INNER JOIN`, `LEFT OUTER JOIN`, `RIGHT OUTER JOIN`, `FULL OUTER JOIN`, and `CROSS JOIN`. + +The following examples are based on this table: + +```sql +select * from x; ++----------+----------+ +| column_1 | column_2 | ++----------+----------+ +| 1 | 2 | ++----------+----------+ +``` + +### INNER JOIN + +The keywords `JOIN` or `INNER JOIN` define a join that only shows rows where there is a match in both tables. + +```sql +❯ select * from x inner join x y ON x.column_1 = y.column_1; ++----------+----------+----------+----------+ +| column_1 | column_2 | column_1 | column_2 | ++----------+----------+----------+----------+ +| 1 | 2 | 1 | 2 | ++----------+----------+----------+----------+ +``` + +### LEFT OUTER JOIN + +The keywords `LEFT JOIN` or `LEFT OUTER JOIN` define a join that includes all rows from the left table even if there +is not a match in the right table. When there is no match, null values are produced for the right side of the join. + +```sql +❯ select * from x left join x y ON x.column_1 = y.column_2; ++----------+----------+----------+----------+ +| column_1 | column_2 | column_1 | column_2 | ++----------+----------+----------+----------+ +| 1 | 2 | | | ++----------+----------+----------+----------+ +``` + +### RIGHT OUTER JOIN + +The keywords `RIGHT JOIN` or `RIGHT OUTER JOIN` define a join that includes all rows from the right table even if there +is not a match in the left table. When there is no match, null values are produced for the left side of the join. + +```sql +❯ select * from x right join x y ON x.column_1 = y.column_2; ++----------+----------+----------+----------+ +| column_1 | column_2 | column_1 | column_2 | ++----------+----------+----------+----------+ +| | | 1 | 2 | ++----------+----------+----------+----------+ +``` + +### FULL OUTER JOIN + +The keywords `FULL JOIN` or `FULL OUTER JOIN` define a join that is effectively a union of a `LEFT OUTER JOIN` and +`RIGHT OUTER JOIN`. It will show all rows from the left and right side of the join and will produce null values on +either side of the join where there is not a match. + +```sql +❯ select * from x full outer join x y ON x.column_1 = y.column_2; ++----------+----------+----------+----------+ +| column_1 | column_2 | column_1 | column_2 | ++----------+----------+----------+----------+ +| 1 | 2 | | | +| | | 1 | 2 | ++----------+----------+----------+----------+ +``` + +### CROSS JOIN + +A cross join produces a cartesian product that matches every row in the left side of the join with every row in the +right side of the join. + +```sql +❯ select * from x cross join x y; ++----------+----------+----------+----------+ +| column_1 | column_2 | column_1 | column_2 | ++----------+----------+----------+----------+ +| 1 | 2 | 1 | 2 | ++----------+----------+----------+----------+ +``` + ## GROUP BY clause Example: From 13d4a7d46e1bb9c8d4c457f887e2260b61112f22 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 13 Aug 2022 10:40:37 -0600 Subject: [PATCH 2/3] revert Cargo.lock --- datafusion-cli/Cargo.lock | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 6ab6ad3105c6..7e0a4d743770 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -57,9 +57,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "20.0.0" +version = "19.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72a69495f06c8abb65b76a87be192a26fa724380d1f292d4e558a32afed9989" +checksum = "89b7e88e4739c3616cae75adce6660c9c1a80f2660545eb77afbe0e4a0f048a0" dependencies = [ "ahash", "bitflags", @@ -75,8 +75,8 @@ dependencies = [ "lexical-core", "multiversion", "num", + "rand", "regex", - "regex-syntax", "serde", "serde_derive", "serde_json", @@ -1277,11 +1277,10 @@ dependencies = [ [[package]] name = "parquet" -version = "20.0.0" +version = "19.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f0af698fcf8d1d9f2971766ebef25821ffe8c39c91837c276dcd97e075d950" +checksum = "2cfcf237362047888b342e4f0e213a9b303133b085853e447f2c58e65e00099d" dependencies = [ - "ahash", "arrow", "base64", "brotli", @@ -1290,13 +1289,11 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown", "lz4", "num", "num-bigint", "parquet-format", "rand", - "seq-macro", "snap", "thrift", "tokio", @@ -1541,12 +1538,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "seq-macro" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0772c5c30e1a0d91f6834f8e545c69281c099dfa9a3ac58d96a9fd629c8d4898" - [[package]] name = "serde" version = "1.0.142" From b7ac35387c8127fa3dcef378102c61add716abac Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 13 Aug 2022 10:41:43 -0600 Subject: [PATCH 3/3] prettier --- docs/source/user-guide/sql/select.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/user-guide/sql/select.md b/docs/source/user-guide/sql/select.md index b705013118fb..008981fde0d3 100644 --- a/docs/source/user-guide/sql/select.md +++ b/docs/source/user-guide/sql/select.md @@ -108,7 +108,7 @@ The keywords `JOIN` or `INNER JOIN` define a join that only shows rows where the ### LEFT OUTER JOIN -The keywords `LEFT JOIN` or `LEFT OUTER JOIN` define a join that includes all rows from the left table even if there +The keywords `LEFT JOIN` or `LEFT OUTER JOIN` define a join that includes all rows from the left table even if there is not a match in the right table. When there is no match, null values are produced for the right side of the join. ```sql @@ -136,8 +136,8 @@ is not a match in the left table. When there is no match, null values are produc ### FULL OUTER JOIN -The keywords `FULL JOIN` or `FULL OUTER JOIN` define a join that is effectively a union of a `LEFT OUTER JOIN` and -`RIGHT OUTER JOIN`. It will show all rows from the left and right side of the join and will produce null values on +The keywords `FULL JOIN` or `FULL OUTER JOIN` define a join that is effectively a union of a `LEFT OUTER JOIN` and +`RIGHT OUTER JOIN`. It will show all rows from the left and right side of the join and will produce null values on either side of the join where there is not a match. ```sql @@ -152,7 +152,7 @@ either side of the join where there is not a match. ### CROSS JOIN -A cross join produces a cartesian product that matches every row in the left side of the join with every row in the +A cross join produces a cartesian product that matches every row in the left side of the join with every row in the right side of the join. ```sql