Skip to content
1 change: 1 addition & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ Below is a list of all the keywords in Spark SQL.
|CODEGEN|non-reserved|non-reserved|non-reserved|
|COLLATE|reserved|non-reserved|reserved|
|COLLATION|reserved|non-reserved|reserved|
|COLLATIONS|reserved|non-reserved|non-reserved|
|COLLECTION|non-reserved|non-reserved|non-reserved|
|COLUMN|reserved|non-reserved|reserved|
|COLUMNS|non-reserved|non-reserved|non-reserved|
Expand Down
111 changes: 111 additions & 0 deletions docs/sql-ref-syntax-aux-show-collations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
---
layout: global
title: SHOW COLLATIONS
displayTitle: SHOW COLLATIONS
license: |
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---

### Description

Returns the list of collations supported by Spark. An optional pattern may be used to filter
the results. The `LIKE` clause is optional.

### Syntax

```sql
SHOW COLLATIONS [ LIKE regex_pattern ]
```

### Parameters

* **regex_pattern**

Specifies the regular expression pattern that is used to filter the results of the statement.

* Except for `*` and `|` character, the pattern works like a regular expression.
* `*` alone matches 0 or more characters and `|` is used to separate multiple different regular expressions,
any of which can match.
* The leading and trailing blanks are trimmed in the input pattern before processing. The pattern match is case-insensitive.

### Output

The output has the following columns:

| Column | Type | Nullable | Description |
|--------|------|----------|-------------|
| NAME | STRING | No | The name of the collation. |
| LANGUAGE | STRING | Yes | The display language of the locale, or `null` for locale-independent collations. |
| COUNTRY | STRING | Yes | The display country of the locale, or `null` for locale-independent collations. |
| ACCENT_SENSITIVITY | STRING | No | Whether the collation is accent-sensitive (`ACCENT_SENSITIVE`) or accent-insensitive (`ACCENT_INSENSITIVE`). |
| CASE_SENSITIVITY | STRING | No | Whether the collation is case-sensitive (`CASE_SENSITIVE`) or case-insensitive (`CASE_INSENSITIVE`). |
| PAD_ATTRIBUTE | STRING | No | The pad attribute of the collation: `NO_PAD` or `RTRIM`. |
| ICU_VERSION | STRING | Yes | The ICU library version used for the collation, or `null` for non-ICU collations such as `UTF8_BINARY` and `UTF8_LCASE`. |

### Examples

```sql
-- List all supported collations (results truncated)
SHOW COLLATIONS;
+-----------------+--------+-------+------------------+----------------+-------------+-----------+
| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION|
+-----------------+--------+-------+------------------+----------------+-------------+-----------+
| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null|
| UTF8_LCASE| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| null|
| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2|
| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2|
| en_USA| English| United States|ACCENT_SENSITIVE|CASE_SENSITIVE| NO_PAD| 78.2|
| en_USA_CI| English| United States|ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2|
| ...| ...| ...| ...| ...| ...| ...|
+-----------------+--------+-------+------------------+----------------+-------------+-----------+

-- List all collations matching `UTF8_BINARY*`
SHOW COLLATIONS LIKE 'UTF8_BINARY*';
+-----------------+--------+-------+------------------+----------------+-------------+-----------+
| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION|
+-----------------+--------+-------+------------------+----------------+-------------+-----------+
| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null|
| UTF8_BINARY_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| null|
+-----------------+--------+-------+------------------+----------------+-------------+-----------+

-- List all collations matching `UNICODE*`
SHOW COLLATIONS LIKE 'UNICODE*';
+--------------+--------+-------+-------------------+----------------+-------------+-----------+
| NAME|LANGUAGE|COUNTRY| ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION|
+--------------+--------+-------+-------------------+----------------+-------------+-----------+
| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2|
| UNICODE_AI| null| null| ACCENT_INSENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2|
| UNICODE_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_SENSITIVE| RTRIM| 78.2|
| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2|
| UNICODE_CI_AI| null| null| ACCENT_INSENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2|
|UNICODE_CI_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2|
| UNICODE_CI_RTRIM| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2|
| UNICODE_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| 78.2|
+--------------+--------+-------+-------------------+----------------+-------------+-----------+

-- List all collations matching `UNICODE` or `UTF8_BINARY`
SHOW COLLATIONS LIKE 'UNICODE|UTF8_BINARY';
+-----------+--------+-------+------------------+----------------+-------------+-----------+
| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION|
+-----------+--------+-------+------------------+----------------+-------------+-----------+
|UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null|
| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2|
+-----------+--------+-------+------------------+----------------+-------------+-----------+
```

### Related Statements

* [STRING TYPE](sql-ref-datatypes.html)
1 change: 1 addition & 0 deletions docs/sql-ref-syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ You use SQL scripting to execute procedural logic in SQL.
* [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
* [SET VAR](sql-ref-syntax-aux-set-var.html)
* [SHOW CACHED TABLES](sql-ref-syntax-aux-show-cached-tables.html)
* [SHOW COLLATIONS](sql-ref-syntax-aux-show-collations.html)
* [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html)
* [SHOW CREATE TABLE](sql-ref-syntax-aux-show-create-table.html)
* [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ CLUSTERED: 'CLUSTERED';
CODEGEN: 'CODEGEN';
COLLATE: 'COLLATE';
COLLATION: 'COLLATION';
COLLATIONS: 'COLLATIONS';
COLLECTION: 'COLLECTION';
COLUMN: 'COLUMN';
COLUMNS: 'COLUMNS';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ statement
| SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable
| SHOW CURRENT namespace #showCurrentNamespace
| SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs
| SHOW COLLATIONS (LIKE? pattern=stringLit)? #showCollations
| (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
| (DESC | DESCRIBE) PROCEDURE identifierReference #describeProcedure
| (DESC | DESCRIBE) namespace EXTENDED?
Expand Down Expand Up @@ -2321,6 +2322,7 @@ nonReserved
| CODEGEN
| COLLATE
| COLLATION
| COLLATIONS
| COLLECTION
| COLUMN
| COLUMNS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
withConnection { conn =>
val metadata = conn.getMetaData
// scalastyle:off line.size.limit
assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE")
assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE")
// scalastyle:on line.size.limit
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,13 @@ class SparkSqlAstBuilder extends AstBuilder {
ShowCatalogsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x))))
}

/**
* Create a [[ShowCollationsCommand]] logical command.
*/
override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan = withOrigin(ctx) {
ShowCollationsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x))))
}

/**
* Converts a multi-part identifier to a TableIdentifier.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.command

import scala.jdk.CollectionConverters._

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.util.{CollationFactory, StringUtils}
import org.apache.spark.sql.types.StringType

/**
* The command for `SHOW COLLATIONS`.
*/
case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCommand {
override val output: Seq[Attribute] = Seq(
AttributeReference("NAME", StringType, nullable = false)(),
AttributeReference("LANGUAGE", StringType, nullable = true)(),
AttributeReference("COUNTRY", StringType, nullable = true)(),
AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(),
AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(),
AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(),
AttributeReference("ICU_VERSION", StringType, nullable = true)())

override def run(sparkSession: SparkSession): Seq[Row] = {
val collations = CollationFactory.listCollations().asScala
.map(CollationFactory.loadCollationMeta)
val filtered = pattern
.map(p => collations.filter(m => StringUtils.filterPattern(Seq(m.collationName), p).nonEmpty))
.getOrElse(collations)
filtered.map { m =>
Row(
m.collationName,
m.language,
m.country,
if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE",
if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE",
m.padAttribute,
m.icuVersion)
}.toSeq
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ CLUSTERED false
CODEGEN false
COLLATE true
COLLATION true
COLLATIONS true
COLLECTION false
COLUMN true
COLUMNS false
Expand Down Expand Up @@ -432,6 +433,7 @@ CAST
CHECK
COLLATE
COLLATION
COLLATIONS
COLUMN
CONSTRAINT
CREATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ CLUSTERED false
CODEGEN false
COLLATE false
COLLATION false
COLLATIONS false
COLLECTION false
COLUMN false
COLUMNS false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ CLUSTERED false
CODEGEN false
COLLATE false
COLLATION false
COLLATIONS false
COLLECTION false
COLUMN false
COLUMNS false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3223,6 +3223,37 @@ class DataSourceV2SQLSuiteV1Filter
Row("testcat"), Row("testcat2")))
}

test("SPARK-49543: ShowCollations") {
val schema = new StructType()
.add("NAME", StringType, nullable = false)
.add("LANGUAGE", StringType, nullable = true)
.add("COUNTRY", StringType, nullable = true)
.add("ACCENT_SENSITIVITY", StringType, nullable = false)
.add("CASE_SENSITIVITY", StringType, nullable = false)
.add("PAD_ATTRIBUTE", StringType, nullable = false)
.add("ICU_VERSION", StringType, nullable = true)

val df = sql("SHOW COLLATIONS")
assert(df.schema === schema)

val allCollations = df.collect()
assert(allCollations.exists(_.getString(0) == "UTF8_BINARY"))
assert(allCollations.exists(_.getString(0) == "UNICODE"))
assert(allCollations.exists(_.getString(0) == "UNICODE_CI"))

val utf8Row = allCollations.find(_.getString(0) == "UTF8_BINARY").get
assert(utf8Row.getString(3) == "ACCENT_SENSITIVE")
assert(utf8Row.getString(4) == "CASE_SENSITIVE")

val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE*'").collect()
assert(likeResult.nonEmpty)
assert(likeResult.forall(_.getString(0).startsWith("UNICODE")))

val exactResult = sql("SHOW COLLATIONS LIKE 'UTF8_BINARY'").collect()
assert(exactResult.length == 1)
assert(exactResult.head.getString(0) == "UTF8_BINARY")
}

test("CREATE INDEX should fail") {
val t = "testcat.tbl"
withTable(t) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,4 +820,16 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
parser.parsePlan("SHOW CATALOGS LIKE 'defau*'"),
ShowCatalogsCommand(Some("defau*")))
}

test("SHOW COLLATIONS") {
comparePlans(
parser.parsePlan("SHOW COLLATIONS"),
ShowCollationsCommand(None))
comparePlans(
parser.parsePlan("SHOW COLLATIONS LIKE 'UNICODE*'"),
ShowCollationsCommand(Some("UNICODE*")))
comparePlans(
parser.parsePlan("SHOW COLLATIONS LIKE 'UTF8_BINARY'"),
ShowCollationsCommand(Some("UTF8_BINARY")))
}
}
Loading