org.apache.spark.sql.catalyst.expressions.Abs |
abs |
SELECT abs(-1) |
struct<abs(-1):int> |
org.apache.spark.sql.catalyst.expressions.Acos |
acos |
SELECT acos(1) |
struct<ACOS(1):double> |
org.apache.spark.sql.catalyst.expressions.Acosh |
acosh |
SELECT acosh(1) |
struct<ACOSH(1):double> |
org.apache.spark.sql.catalyst.expressions.Add |
+ |
SELECT 1 + 2 |
struct<(1 + 2):int> |
org.apache.spark.sql.catalyst.expressions.AddMonths |
add_months |
SELECT add_months('2016-08-31', 1) |
struct<add_months(2016-08-31, 1):date> |
org.apache.spark.sql.catalyst.expressions.And |
and |
SELECT true and true |
struct<(true AND true):boolean> |
org.apache.spark.sql.catalyst.expressions.ArrayAggregate |
aggregate |
SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) |
struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
org.apache.spark.sql.catalyst.expressions.ArrayContains |
array_contains |
SELECT array_contains(array(1, 2, 3), 2) |
struct<array_contains(array(1, 2, 3), 2):boolean> |
org.apache.spark.sql.catalyst.expressions.ArrayDistinct |
array_distinct |
SELECT array_distinct(array(1, 2, 3, null, 3)) |
struct<array_distinct(array(1, 2, 3, NULL, 3)):array> |
org.apache.spark.sql.catalyst.expressions.ArrayExcept |
array_except |
SELECT array_except(array(1, 2, 3), array(1, 3, 5)) |
struct<array_except(array(1, 2, 3), array(1, 3, 5)):array> |
org.apache.spark.sql.catalyst.expressions.ArrayExists |
exists |
SELECT exists(array(1, 2, 3), x -> x % 2 == 0) |
struct<exists(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean> |
org.apache.spark.sql.catalyst.expressions.ArrayFilter |
filter |
SELECT filter(array(1, 2, 3), x -> x % 2 == 1) |
struct<filter(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 1), namedlambdavariable())):array> |
org.apache.spark.sql.catalyst.expressions.ArrayForAll |
forall |
SELECT forall(array(1, 2, 3), x -> x % 2 == 0) |
struct<forall(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean> |
org.apache.spark.sql.catalyst.expressions.ArrayIntersect |
array_intersect |
SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) |
struct<array_intersect(array(1, 2, 3), array(1, 3, 5)):array> |
org.apache.spark.sql.catalyst.expressions.ArrayJoin |
array_join |
SELECT array_join(array('hello', 'world'), ' ') |
struct<array_join(array(hello, world), ):string> |
org.apache.spark.sql.catalyst.expressions.ArrayMax |
array_max |
SELECT array_max(array(1, 20, null, 3)) |
struct<array_max(array(1, 20, NULL, 3)):int> |
org.apache.spark.sql.catalyst.expressions.ArrayMin |
array_min |
SELECT array_min(array(1, 20, null, 3)) |
struct<array_min(array(1, 20, NULL, 3)):int> |
org.apache.spark.sql.catalyst.expressions.ArrayPosition |
array_position |
SELECT array_position(array(3, 2, 1), 1) |
struct<array_position(array(3, 2, 1), 1):bigint> |
org.apache.spark.sql.catalyst.expressions.ArrayRemove |
array_remove |
SELECT array_remove(array(1, 2, 3, null, 3), 3) |
struct<array_remove(array(1, 2, 3, NULL, 3), 3):array> |
org.apache.spark.sql.catalyst.expressions.ArrayRepeat |
array_repeat |
SELECT array_repeat('123', 2) |
struct<array_repeat(123, 2):array> |
org.apache.spark.sql.catalyst.expressions.ArraySort |
array_sort |
SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) |
struct<array_sort(array(5, 6, 1), lambdafunction(CASE WHEN (namedlambdavariable() < namedlambdavariable()) THEN -1 WHEN (namedlambdavariable() > namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> |
org.apache.spark.sql.catalyst.expressions.ArrayTransform |
transform |
SELECT transform(array(1, 2, 3), x -> x + 1) |
struct<transform(array(1, 2, 3), lambdafunction((namedlambdavariable() + 1), namedlambdavariable())):array> |
org.apache.spark.sql.catalyst.expressions.ArrayUnion |
array_union |
SELECT array_union(array(1, 2, 3), array(1, 3, 5)) |
struct<array_union(array(1, 2, 3), array(1, 3, 5)):array> |
org.apache.spark.sql.catalyst.expressions.ArraysOverlap |
arrays_overlap |
SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) |
struct<arrays_overlap(array(1, 2, 3), array(3, 4, 5)):boolean> |
org.apache.spark.sql.catalyst.expressions.ArraysZip |
arrays_zip |
SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) |
struct<arrays_zip(array(1, 2, 3), array(2, 3, 4)):array<struct<0:int,1:int>>> |
org.apache.spark.sql.catalyst.expressions.Ascii |
ascii |
SELECT ascii('222') |
struct<ascii(222):int> |
org.apache.spark.sql.catalyst.expressions.Asin |
asin |
SELECT asin(0) |
struct<ASIN(0):double> |
org.apache.spark.sql.catalyst.expressions.Asinh |
asinh |
SELECT asinh(0) |
struct<ASINH(0):double> |
org.apache.spark.sql.catalyst.expressions.AssertTrue |
assert_true |
SELECT assert_true(0 < 1) |
struct<assert_true((0 < 1), '(0 < 1)' is not true!):null> |
org.apache.spark.sql.catalyst.expressions.Atan |
atan |
SELECT atan(0) |
struct<ATAN(0):double> |
org.apache.spark.sql.catalyst.expressions.Atan2 |
atan2 |
SELECT atan2(0, 0) |
struct<ATAN2(0, 0):double> |
org.apache.spark.sql.catalyst.expressions.Atanh |
atanh |
SELECT atanh(0) |
struct<ATANH(0):double> |
org.apache.spark.sql.catalyst.expressions.BRound |
bround |
SELECT bround(2.5, 0) |
struct<bround(2.5, 0):decimal(2,0)> |
org.apache.spark.sql.catalyst.expressions.Base64 |
base64 |
SELECT base64('Spark SQL') |
struct<base64(Spark SQL):string> |
org.apache.spark.sql.catalyst.expressions.Bin |
bin |
SELECT bin(13) |
struct<bin(13):string> |
org.apache.spark.sql.catalyst.expressions.BitLength |
bit_length |
SELECT bit_length('Spark SQL') |
struct<bit_length(Spark SQL):int> |
org.apache.spark.sql.catalyst.expressions.BitwiseAnd |
& |
SELECT 3 & 5 |
struct<(3 & 5):int> |
org.apache.spark.sql.catalyst.expressions.BitwiseCount |
bit_count |
SELECT bit_count(0) |
struct<bit_count(0):int> |
org.apache.spark.sql.catalyst.expressions.BitwiseGet |
bit_get |
SELECT bit_get(11, 0) |
struct<bit_get(11, 0):tinyint> |
org.apache.spark.sql.catalyst.expressions.BitwiseGet |
getbit |
SELECT getbit(11, 0) |
struct<getbit(11, 0):tinyint> |
org.apache.spark.sql.catalyst.expressions.BitwiseNot |
~ |
SELECT ~ 0 |
struct<~0:int> |
org.apache.spark.sql.catalyst.expressions.BitwiseOr |
| |
SELECT 3 | 5 |
struct<(3 | 5):int> |
org.apache.spark.sql.catalyst.expressions.BitwiseXor |
^ |
SELECT 3 ^ 5 |
struct<(3 ^ 5):int> |
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection |
java_method |
SELECT java_method('java.util.UUID', 'randomUUID') |
struct<java_method(java.util.UUID, randomUUID):string> |
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection |
reflect |
SELECT reflect('java.util.UUID', 'randomUUID') |
struct<reflect(java.util.UUID, randomUUID):string> |
org.apache.spark.sql.catalyst.expressions.CaseWhen |
when |
SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END |
struct<CASE WHEN (1 > 0) THEN 1 WHEN (2 > 0) THEN 2.0 ELSE 1.2 END:decimal(11,1)> |
org.apache.spark.sql.catalyst.expressions.Cast |
bigint |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
binary |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
boolean |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
cast |
SELECT cast('10' as int) |
struct<CAST(10 AS INT):int> |
org.apache.spark.sql.catalyst.expressions.Cast |
date |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
decimal |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
double |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
float |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
int |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
smallint |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
string |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
timestamp |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cast |
tinyint |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.Cbrt |
cbrt |
SELECT cbrt(27.0) |
struct<CBRT(27.0):double> |
org.apache.spark.sql.catalyst.expressions.Ceil |
ceil |
SELECT ceil(-0.1) |
struct<CEIL(-0.1):decimal(1,0)> |
org.apache.spark.sql.catalyst.expressions.Ceil |
ceiling |
SELECT ceiling(-0.1) |
struct<ceiling(-0.1):decimal(1,0)> |
org.apache.spark.sql.catalyst.expressions.Chr |
char |
SELECT char(65) |
struct<char(65):string> |
org.apache.spark.sql.catalyst.expressions.Chr |
chr |
SELECT chr(65) |
struct<chr(65):string> |
org.apache.spark.sql.catalyst.expressions.Coalesce |
coalesce |
SELECT coalesce(NULL, 1, NULL) |
struct<coalesce(NULL, 1, NULL):int> |
org.apache.spark.sql.catalyst.expressions.Concat |
concat |
SELECT concat('Spark', 'SQL') |
struct<concat(Spark, SQL):string> |
org.apache.spark.sql.catalyst.expressions.ConcatWs |
concat_ws |
SELECT concat_ws(' ', 'Spark', 'SQL') |
struct<concat_ws( , Spark, SQL):string> |
org.apache.spark.sql.catalyst.expressions.Conv |
conv |
SELECT conv('100', 2, 10) |
struct<conv(100, 2, 10):string> |
org.apache.spark.sql.catalyst.expressions.Cos |
cos |
SELECT cos(0) |
struct<COS(0):double> |
org.apache.spark.sql.catalyst.expressions.Cosh |
cosh |
SELECT cosh(0) |
struct<COSH(0):double> |
org.apache.spark.sql.catalyst.expressions.Cot |
cot |
SELECT cot(1) |
struct<COT(1):double> |
org.apache.spark.sql.catalyst.expressions.Crc32 |
crc32 |
SELECT crc32('Spark') |
struct<crc32(Spark):bigint> |
org.apache.spark.sql.catalyst.expressions.CreateArray |
array |
SELECT array(1, 2, 3) |
struct<array(1, 2, 3):array> |
org.apache.spark.sql.catalyst.expressions.CreateMap |
map |
SELECT map(1.0, '2', 3.0, '4') |
struct<map(1.0, 2, 3.0, 4):map<decimal(2,1),string>> |
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct |
named_struct |
SELECT named_struct("a", 1, "b", 2, "c", 3) |
struct<named_struct(a, 1, b, 2, c, 3):struct<a:int,b:int,c:int>> |
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct |
struct |
SELECT struct(1, 2, 3) |
struct<struct(1, 2, 3):structcol1:int,col2:int,col3:int> |
org.apache.spark.sql.catalyst.expressions.CsvToStructs |
from_csv |
SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') |
struct<from_csv(1, 0.8):struct<a:int,b:double>> |
org.apache.spark.sql.catalyst.expressions.CumeDist |
cume_dist |
SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
org.apache.spark.sql.catalyst.expressions.CurrentCatalog |
current_catalog |
SELECT current_catalog() |
struct<current_catalog():string> |
org.apache.spark.sql.catalyst.expressions.CurrentDatabase |
current_database |
SELECT current_database() |
struct<current_database():string> |
org.apache.spark.sql.catalyst.expressions.CurrentDate |
current_date |
SELECT current_date() |
struct<current_date():date> |
org.apache.spark.sql.catalyst.expressions.CurrentTimeZone |
current_timezone |
SELECT current_timezone() |
struct<current_timezone():string> |
org.apache.spark.sql.catalyst.expressions.CurrentTimestamp |
current_timestamp |
SELECT current_timestamp() |
struct<current_timestamp():timestamp> |
org.apache.spark.sql.catalyst.expressions.CurrentUser |
current_user |
SELECT current_user() |
struct<current_user():string> |
org.apache.spark.sql.catalyst.expressions.DateAdd |
date_add |
SELECT date_add('2016-07-30', 1) |
struct<date_add(2016-07-30, 1):date> |
org.apache.spark.sql.catalyst.expressions.DateDiff |
datediff |
SELECT datediff('2009-07-31', '2009-07-30') |
struct<datediff(2009-07-31, 2009-07-30):int> |
org.apache.spark.sql.catalyst.expressions.DateFormatClass |
date_format |
SELECT date_format('2016-04-08', 'y') |
struct<date_format(2016-04-08, y):string> |
org.apache.spark.sql.catalyst.expressions.DateFromUnixDate |
date_from_unix_date |
SELECT date_from_unix_date(1) |
struct<date_from_unix_date(1):date> |
org.apache.spark.sql.catalyst.expressions.DatePart |
date_part |
SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') |
struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
org.apache.spark.sql.catalyst.expressions.DateSub |
date_sub |
SELECT date_sub('2016-07-30', 1) |
struct<date_sub(2016-07-30, 1):date> |
org.apache.spark.sql.catalyst.expressions.DayOfMonth |
day |
SELECT day('2009-07-30') |
struct<day(2009-07-30):int> |
org.apache.spark.sql.catalyst.expressions.DayOfMonth |
dayofmonth |
SELECT dayofmonth('2009-07-30') |
struct<dayofmonth(2009-07-30):int> |
org.apache.spark.sql.catalyst.expressions.DayOfWeek |
dayofweek |
SELECT dayofweek('2009-07-30') |
struct<dayofweek(2009-07-30):int> |
org.apache.spark.sql.catalyst.expressions.DayOfYear |
dayofyear |
SELECT dayofyear('2016-04-09') |
struct<dayofyear(2016-04-09):int> |
org.apache.spark.sql.catalyst.expressions.Decode |
decode |
SELECT decode(encode('abc', 'utf-8'), 'utf-8') |
struct<decode(encode(abc, utf-8), utf-8):string> |
org.apache.spark.sql.catalyst.expressions.DenseRank |
dense_rank |
SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,DENSE_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
org.apache.spark.sql.catalyst.expressions.Divide |
/ |
SELECT 3 / 2 |
struct<(3 / 2):double> |
org.apache.spark.sql.catalyst.expressions.ElementAt |
element_at |
SELECT element_at(array(1, 2, 3), 2) |
struct<element_at(array(1, 2, 3), 2):int> |
org.apache.spark.sql.catalyst.expressions.Elt |
elt |
SELECT elt(1, 'scala', 'java') |
struct<elt(1, scala, java):string> |
org.apache.spark.sql.catalyst.expressions.Encode |
encode |
SELECT encode('abc', 'utf-8') |
struct<encode(abc, utf-8):binary> |
org.apache.spark.sql.catalyst.expressions.EqualNullSafe |
<=> |
SELECT 2 <=> 2 |
struct<(2 <=> 2):boolean> |
org.apache.spark.sql.catalyst.expressions.EqualTo |
= |
SELECT 2 = 2 |
struct<(2 = 2):boolean> |
org.apache.spark.sql.catalyst.expressions.EqualTo |
== |
SELECT 2 == 2 |
struct<(2 = 2):boolean> |
org.apache.spark.sql.catalyst.expressions.EulerNumber |
e |
SELECT e() |
struct<E():double> |
org.apache.spark.sql.catalyst.expressions.Exp |
exp |
SELECT exp(0) |
struct<EXP(0):double> |
org.apache.spark.sql.catalyst.expressions.Explode |
explode |
SELECT explode(array(10, 20)) |
structcol:int |
org.apache.spark.sql.catalyst.expressions.Explode |
explode_outer |
SELECT explode_outer(array(10, 20)) |
structcol:int |
org.apache.spark.sql.catalyst.expressions.Expm1 |
expm1 |
SELECT expm1(0) |
struct<EXPM1(0):double> |
org.apache.spark.sql.catalyst.expressions.Extract |
extract |
SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') |
struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
org.apache.spark.sql.catalyst.expressions.Factorial |
factorial |
SELECT factorial(5) |
struct<factorial(5):bigint> |
org.apache.spark.sql.catalyst.expressions.FindInSet |
find_in_set |
SELECT find_in_set('ab','abc,b,ab,c,def') |
struct<find_in_set(ab, abc,b,ab,c,def):int> |
org.apache.spark.sql.catalyst.expressions.Flatten |
flatten |
SELECT flatten(array(array(1, 2), array(3, 4))) |
struct<flatten(array(array(1, 2), array(3, 4))):array> |
org.apache.spark.sql.catalyst.expressions.Floor |
floor |
SELECT floor(-0.1) |
struct<FLOOR(-0.1):decimal(1,0)> |
org.apache.spark.sql.catalyst.expressions.FormatNumber |
format_number |
SELECT format_number(12332.123456, 4) |
struct<format_number(12332.123456, 4):string> |
org.apache.spark.sql.catalyst.expressions.FormatString |
format_string |
SELECT format_string("Hello World %d %s", 100, "days") |
struct<format_string(Hello World %d %s, 100, days):string> |
org.apache.spark.sql.catalyst.expressions.FormatString |
printf |
SELECT printf("Hello World %d %s", 100, "days") |
struct<printf(Hello World %d %s, 100, days):string> |
org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp |
from_utc_timestamp |
SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') |
struct<from_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
org.apache.spark.sql.catalyst.expressions.FromUnixTime |
from_unixtime |
SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') |
struct<from_unixtime(0, yyyy-MM-dd HH:mm:ss):string> |
org.apache.spark.sql.catalyst.expressions.GetJsonObject |
get_json_object |
SELECT get_json_object('{"a":"b"}', '$.a') |
struct<get_json_object({"a":"b"}, $.a):string> |
org.apache.spark.sql.catalyst.expressions.GreaterThan |
> |
SELECT 2 > 1 |
struct<(2 > 1):boolean> |
org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual |
>= |
SELECT 2 >= 1 |
struct<(2 >= 1):boolean> |
org.apache.spark.sql.catalyst.expressions.Greatest |
greatest |
SELECT greatest(10, 9, 2, 4, 3) |
struct<greatest(10, 9, 2, 4, 3):int> |
org.apache.spark.sql.catalyst.expressions.Grouping |
grouping |
SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) |
structname:string,grouping(name):tinyint,sum(age):bigint |
org.apache.spark.sql.catalyst.expressions.GroupingID |
grouping_id |
SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) |
structname:string,grouping_id():bigint,sum(age):bigint,avg(height):double |
org.apache.spark.sql.catalyst.expressions.Hex |
hex |
SELECT hex(17) |
struct<hex(17):string> |
org.apache.spark.sql.catalyst.expressions.Hour |
hour |
SELECT hour('2009-07-30 12:58:59') |
struct<hour(2009-07-30 12:58:59):int> |
org.apache.spark.sql.catalyst.expressions.Hypot |
hypot |
SELECT hypot(3, 4) |
struct<HYPOT(3, 4):double> |
org.apache.spark.sql.catalyst.expressions.If |
if |
SELECT if(1 < 2, 'a', 'b') |
struct<(IF((1 < 2), a, b)):string> |
org.apache.spark.sql.catalyst.expressions.IfNull |
ifnull |
SELECT ifnull(NULL, array('2')) |
struct<ifnull(NULL, array(2)):array> |
org.apache.spark.sql.catalyst.expressions.In |
in |
SELECT 1 in(1, 2, 3) |
struct<(1 IN (1, 2, 3)):boolean> |
org.apache.spark.sql.catalyst.expressions.InitCap |
initcap |
SELECT initcap('sPark sql') |
struct<initcap(sPark sql):string> |
org.apache.spark.sql.catalyst.expressions.Inline |
inline |
SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) |
structcol1:int,col2:string |
org.apache.spark.sql.catalyst.expressions.Inline |
inline_outer |
SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) |
structcol1:int,col2:string |
org.apache.spark.sql.catalyst.expressions.InputFileBlockLength |
input_file_block_length |
SELECT input_file_block_length() |
struct<input_file_block_length():bigint> |
org.apache.spark.sql.catalyst.expressions.InputFileBlockStart |
input_file_block_start |
SELECT input_file_block_start() |
struct<input_file_block_start():bigint> |
org.apache.spark.sql.catalyst.expressions.InputFileName |
input_file_name |
SELECT input_file_name() |
struct<input_file_name():string> |
org.apache.spark.sql.catalyst.expressions.IntegralDivide |
div |
SELECT 3 div 2 |
struct<(3 div 2):bigint> |
org.apache.spark.sql.catalyst.expressions.IsNaN |
isnan |
SELECT isnan(cast('NaN' as double)) |
struct<isnan(CAST(NaN AS DOUBLE)):boolean> |
org.apache.spark.sql.catalyst.expressions.IsNotNull |
isnotnull |
SELECT isnotnull(1) |
struct<(1 IS NOT NULL):boolean> |
org.apache.spark.sql.catalyst.expressions.IsNull |
isnull |
SELECT isnull(1) |
struct<(1 IS NULL):boolean> |
org.apache.spark.sql.catalyst.expressions.JsonObjectKeys |
json_object_keys |
SELECT json_object_keys('{}') |
struct<json_object_keys({}):array> |
org.apache.spark.sql.catalyst.expressions.JsonToStructs |
from_json |
SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') |
struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>> |
org.apache.spark.sql.catalyst.expressions.JsonTuple |
json_tuple |
SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') |
structc0:string,c1:string |
org.apache.spark.sql.catalyst.expressions.Lag |
lag |
SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,lag(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int> |
org.apache.spark.sql.catalyst.expressions.LastDay |
last_day |
SELECT last_day('2009-01-12') |
struct<last_day(2009-01-12):date> |
org.apache.spark.sql.catalyst.expressions.Lead |
lead |
SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,lead(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int> |
org.apache.spark.sql.catalyst.expressions.Least |
least |
SELECT least(10, 9, 2, 4, 3) |
struct<least(10, 9, 2, 4, 3):int> |
org.apache.spark.sql.catalyst.expressions.Left |
left |
SELECT left('Spark SQL', 3) |
struct<left(Spark SQL, 3):string> |
org.apache.spark.sql.catalyst.expressions.Length |
char_length |
SELECT char_length('Spark SQL ') |
struct<char_length(Spark SQL ):int> |
org.apache.spark.sql.catalyst.expressions.Length |
character_length |
SELECT character_length('Spark SQL ') |
struct<character_length(Spark SQL ):int> |
org.apache.spark.sql.catalyst.expressions.Length |
length |
SELECT length('Spark SQL ') |
struct<length(Spark SQL ):int> |
org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray |
json_array_length |
SELECT json_array_length('[1,2,3,4]') |
struct<json_array_length([1,2,3,4]):int> |
org.apache.spark.sql.catalyst.expressions.LessThan |
< |
SELECT 1 < 2 |
struct<(1 < 2):boolean> |
org.apache.spark.sql.catalyst.expressions.LessThanOrEqual |
<= |
SELECT 2 <= 2 |
struct<(2 <= 2):boolean> |
org.apache.spark.sql.catalyst.expressions.Levenshtein |
levenshtein |
SELECT levenshtein('kitten', 'sitting') |
struct<levenshtein(kitten, sitting):int> |
org.apache.spark.sql.catalyst.expressions.Like |
like |
SELECT like('Spark', '_park') |
struct |
org.apache.spark.sql.catalyst.expressions.Log |
ln |
SELECT ln(1) |
struct<ln(1):double> |
org.apache.spark.sql.catalyst.expressions.Log10 |
log10 |
SELECT log10(10) |
struct<LOG10(10):double> |
org.apache.spark.sql.catalyst.expressions.Log1p |
log1p |
SELECT log1p(0) |
struct<LOG1P(0):double> |
org.apache.spark.sql.catalyst.expressions.Log2 |
log2 |
SELECT log2(2) |
struct<LOG2(2):double> |
org.apache.spark.sql.catalyst.expressions.Logarithm |
log |
SELECT log(10, 100) |
struct<LOG(10, 100):double> |
org.apache.spark.sql.catalyst.expressions.Lower |
lcase |
SELECT lcase('SparkSql') |
struct<lcase(SparkSql):string> |
org.apache.spark.sql.catalyst.expressions.Lower |
lower |
SELECT lower('SparkSql') |
struct<lower(SparkSql):string> |
org.apache.spark.sql.catalyst.expressions.MakeDTInterval |
make_dt_interval |
SELECT make_dt_interval(1, 12, 30, 01.001001) |
struct<make_dt_interval(1, 12, 30, 1.001001):interval day to second> |
org.apache.spark.sql.catalyst.expressions.MakeDate |
make_date |
SELECT make_date(2013, 7, 15) |
struct<make_date(2013, 7, 15):date> |
org.apache.spark.sql.catalyst.expressions.MakeInterval |
make_interval |
SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) |
struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
org.apache.spark.sql.catalyst.expressions.MakeTimestamp |
make_timestamp |
SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) |
struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
org.apache.spark.sql.catalyst.expressions.MakeYMInterval |
make_ym_interval |
SELECT make_ym_interval(1, 2) |
struct<make_ym_interval(1, 2):interval year to month> |
org.apache.spark.sql.catalyst.expressions.MapConcat |
map_concat |
SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) |
struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
org.apache.spark.sql.catalyst.expressions.MapEntries |
map_entries |
SELECT map_entries(map(1, 'a', 2, 'b')) |
struct<map_entries(map(1, a, 2, b)):array<structkey:int,value:string>> |
org.apache.spark.sql.catalyst.expressions.MapFilter |
map_filter |
SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) |
struct<map_filter(map(1, 0, 2, 2, 3, -1), lambdafunction((namedlambdavariable() > namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map<int,int>> |
org.apache.spark.sql.catalyst.expressions.MapFromArrays |
map_from_arrays |
SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) |
struct<map_from_arrays(array(1.0, 3.0), array(2, 4)):map<decimal(2,1),string>> |
org.apache.spark.sql.catalyst.expressions.MapFromEntries |
map_from_entries |
SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) |
struct<map_from_entries(array(struct(1, a), struct(2, b))):map<int,string>> |
org.apache.spark.sql.catalyst.expressions.MapKeys |
map_keys |
SELECT map_keys(map(1, 'a', 2, 'b')) |
struct<map_keys(map(1, a, 2, b)):array> |
org.apache.spark.sql.catalyst.expressions.MapValues |
map_values |
SELECT map_values(map(1, 'a', 2, 'b')) |
struct<map_values(map(1, a, 2, b)):array> |
org.apache.spark.sql.catalyst.expressions.MapZipWith |
map_zip_with |
SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) |
struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>> |
org.apache.spark.sql.catalyst.expressions.Md5 |
md5 |
SELECT md5('Spark') |
struct<md5(Spark):string> |
org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp |
timestamp_micros |
SELECT timestamp_micros(1230219000123123) |
struct<timestamp_micros(1230219000123123):timestamp> |
org.apache.spark.sql.catalyst.expressions.MillisToTimestamp |
timestamp_millis |
SELECT timestamp_millis(1230219000123) |
struct<timestamp_millis(1230219000123):timestamp> |
org.apache.spark.sql.catalyst.expressions.Minute |
minute |
SELECT minute('2009-07-30 12:58:59') |
struct<minute(2009-07-30 12:58:59):int> |
org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID |
monotonically_increasing_id |
SELECT monotonically_increasing_id() |
struct<monotonically_increasing_id():bigint> |
org.apache.spark.sql.catalyst.expressions.Month |
month |
SELECT month('2016-07-30') |
struct<month(2016-07-30):int> |
org.apache.spark.sql.catalyst.expressions.MonthsBetween |
months_between |
SELECT months_between('1997-02-28 10:30:00', '1996-10-30') |
struct<months_between(1997-02-28 10:30:00, 1996-10-30, true):double> |
org.apache.spark.sql.catalyst.expressions.Multiply |
* |
SELECT 2 * 3 |
struct<(2 * 3):int> |
org.apache.spark.sql.catalyst.expressions.Murmur3Hash |
hash |
SELECT hash('Spark', array(123), 2) |
struct<hash(Spark, array(123), 2):int> |
org.apache.spark.sql.catalyst.expressions.NTile |
ntile |
SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,ntile(2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
org.apache.spark.sql.catalyst.expressions.NaNvl |
nanvl |
SELECT nanvl(cast('NaN' as double), 123) |
struct<nanvl(CAST(NaN AS DOUBLE), 123):double> |
org.apache.spark.sql.catalyst.expressions.NextDay |
next_day |
SELECT next_day('2015-01-14', 'TU') |
struct<next_day(2015-01-14, TU):date> |
org.apache.spark.sql.catalyst.expressions.Not |
! |
SELECT ! true |
struct<(NOT true):boolean> |
org.apache.spark.sql.catalyst.expressions.Not |
not |
SELECT not true |
struct<(NOT true):boolean> |
org.apache.spark.sql.catalyst.expressions.Now |
now |
SELECT now() |
struct<now():timestamp> |
org.apache.spark.sql.catalyst.expressions.NthValue |
nth_value |
SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
org.apache.spark.sql.catalyst.expressions.NullIf |
nullif |
SELECT nullif(2, 2) |
struct<nullif(2, 2):int> |
org.apache.spark.sql.catalyst.expressions.Nvl |
nvl |
SELECT nvl(NULL, array('2')) |
struct<nvl(NULL, array(2)):array> |
org.apache.spark.sql.catalyst.expressions.Nvl2 |
nvl2 |
SELECT nvl2(NULL, 2, 1) |
struct<nvl2(NULL, 2, 1):int> |
org.apache.spark.sql.catalyst.expressions.OctetLength |
octet_length |
SELECT octet_length('Spark SQL') |
struct<octet_length(Spark SQL):int> |
org.apache.spark.sql.catalyst.expressions.Or |
or |
SELECT true or false |
struct<(true OR false):boolean> |
org.apache.spark.sql.catalyst.expressions.Overlay |
overlay |
SELECT overlay('Spark SQL' PLACING '_' FROM 6) |
struct<overlay(Spark SQL, _, 6, -1):string> |
org.apache.spark.sql.catalyst.expressions.ParseToDate |
to_date |
SELECT to_date('2009-07-30 04:17:52') |
struct<to_date(2009-07-30 04:17:52):date> |
org.apache.spark.sql.catalyst.expressions.ParseToTimestamp |
to_timestamp |
SELECT to_timestamp('2016-12-31 00:12:00') |
struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
org.apache.spark.sql.catalyst.expressions.ParseToTimestampWithoutTZ |
to_timestamp_ntz |
SELECT to_timestamp_ntz('2016-12-31 00:12:00') |
struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp without time zone> |
org.apache.spark.sql.catalyst.expressions.ParseUrl |
parse_url |
SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') |
struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
org.apache.spark.sql.catalyst.expressions.PercentRank |
percent_rank |
SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
org.apache.spark.sql.catalyst.expressions.Pi |
pi |
SELECT pi() |
struct<PI():double> |
org.apache.spark.sql.catalyst.expressions.Pmod |
pmod |
SELECT pmod(10, 3) |
struct<pmod(10, 3):int> |
org.apache.spark.sql.catalyst.expressions.PosExplode |
posexplode |
SELECT posexplode(array(10,20)) |
structpos:int,col:int |
org.apache.spark.sql.catalyst.expressions.PosExplode |
posexplode_outer |
SELECT posexplode_outer(array(10,20)) |
structpos:int,col:int |
org.apache.spark.sql.catalyst.expressions.Pow |
pow |
SELECT pow(2, 3) |
struct<pow(2, 3):double> |
org.apache.spark.sql.catalyst.expressions.Pow |
power |
SELECT power(2, 3) |
struct<POWER(2, 3):double> |
org.apache.spark.sql.catalyst.expressions.Quarter |
quarter |
SELECT quarter('2016-08-31') |
struct<quarter(2016-08-31):int> |
org.apache.spark.sql.catalyst.expressions.RLike |
regexp |
SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') |
struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
org.apache.spark.sql.catalyst.expressions.RLike |
regexp_like |
SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') |
struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
org.apache.spark.sql.catalyst.expressions.RLike |
rlike |
SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') |
struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
org.apache.spark.sql.catalyst.expressions.RaiseError |
raise_error |
SELECT raise_error('custom error message') |
struct<raise_error(custom error message):null> |
org.apache.spark.sql.catalyst.expressions.Rand |
rand |
SELECT rand() |
struct<rand():double> |
org.apache.spark.sql.catalyst.expressions.Rand |
random |
SELECT random() |
struct<rand():double> |
org.apache.spark.sql.catalyst.expressions.Randn |
randn |
SELECT randn() |
struct<randn():double> |
org.apache.spark.sql.catalyst.expressions.Rank |
rank |
SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
org.apache.spark.sql.catalyst.expressions.RegExpExtract |
regexp_extract |
SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) |
struct<regexp_extract(100-200, (\d+)-(\d+), 1):string> |
org.apache.spark.sql.catalyst.expressions.RegExpExtractAll |
regexp_extract_all |
SELECT regexp_extract_all('100-200, 300-400', '(\d+)-(\d+)', 1) |
struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array> |
org.apache.spark.sql.catalyst.expressions.RegExpReplace |
regexp_replace |
SELECT regexp_replace('100-200', '(\d+)', 'num') |
struct<regexp_replace(100-200, (\d+), num, 1):string> |
org.apache.spark.sql.catalyst.expressions.Remainder |
% |
SELECT 2 % 1.8 |
struct<(2 % 1.8):decimal(2,1)> |
org.apache.spark.sql.catalyst.expressions.Remainder |
mod |
SELECT 2 % 1.8 |
struct<(2 % 1.8):decimal(2,1)> |
org.apache.spark.sql.catalyst.expressions.Reverse |
reverse |
SELECT reverse('Spark SQL') |
struct<reverse(Spark SQL):string> |
org.apache.spark.sql.catalyst.expressions.Right |
right |
SELECT right('Spark SQL', 3) |
struct<right(Spark SQL, 3):string> |
org.apache.spark.sql.catalyst.expressions.Rint |
rint |
SELECT rint(12.3456) |
struct<rint(12.3456):double> |
org.apache.spark.sql.catalyst.expressions.Round |
round |
SELECT round(2.5, 0) |
struct<round(2.5, 0):decimal(2,0)> |
org.apache.spark.sql.catalyst.expressions.RowNumber |
row_number |
SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) |
struct<a:string,b:int,row_number() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
org.apache.spark.sql.catalyst.expressions.SchemaOfCsv |
schema_of_csv |
SELECT schema_of_csv('1,abc') |
struct<schema_of_csv(1,abc):string> |
org.apache.spark.sql.catalyst.expressions.SchemaOfJson |
schema_of_json |
SELECT schema_of_json('[{"col":0}]') |
struct<schema_of_json([{"col":0}]):string> |
org.apache.spark.sql.catalyst.expressions.Second |
second |
SELECT second('2009-07-30 12:58:59') |
struct<second(2009-07-30 12:58:59):int> |
org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp |
timestamp_seconds |
SELECT timestamp_seconds(1230219000) |
struct<timestamp_seconds(1230219000):timestamp> |
org.apache.spark.sql.catalyst.expressions.Sentences |
sentences |
SELECT sentences('Hi there! Good morning.') |
struct<sentences(Hi there! Good morning., , ):array<array>> |
org.apache.spark.sql.catalyst.expressions.Sequence |
sequence |
SELECT sequence(1, 5) |
struct<sequence(1, 5):array> |
org.apache.spark.sql.catalyst.expressions.Sha1 |
sha |
SELECT sha('Spark') |
struct<sha(Spark):string> |
org.apache.spark.sql.catalyst.expressions.Sha1 |
sha1 |
SELECT sha1('Spark') |
struct<sha1(Spark):string> |
org.apache.spark.sql.catalyst.expressions.Sha2 |
sha2 |
SELECT sha2('Spark', 256) |
struct<sha2(Spark, 256):string> |
org.apache.spark.sql.catalyst.expressions.ShiftLeft |
shiftleft |
SELECT shiftleft(2, 1) |
struct<shiftleft(2, 1):int> |
org.apache.spark.sql.catalyst.expressions.ShiftRight |
shiftright |
SELECT shiftright(4, 1) |
struct<shiftright(4, 1):int> |
org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned |
shiftrightunsigned |
SELECT shiftrightunsigned(4, 1) |
struct<shiftrightunsigned(4, 1):int> |
org.apache.spark.sql.catalyst.expressions.Shuffle |
shuffle |
SELECT shuffle(array(1, 20, 3, 5)) |
struct<shuffle(array(1, 20, 3, 5)):array> |
org.apache.spark.sql.catalyst.expressions.Signum |
sign |
SELECT sign(40) |
struct<sign(40):double> |
org.apache.spark.sql.catalyst.expressions.Signum |
signum |
SELECT signum(40) |
struct<SIGNUM(40):double> |
org.apache.spark.sql.catalyst.expressions.Sin |
sin |
SELECT sin(0) |
struct<SIN(0):double> |
org.apache.spark.sql.catalyst.expressions.Sinh |
sinh |
SELECT sinh(0) |
struct<SINH(0):double> |
org.apache.spark.sql.catalyst.expressions.Size |
cardinality |
SELECT cardinality(array('b', 'd', 'c', 'a')) |
struct<cardinality(array(b, d, c, a)):int> |
org.apache.spark.sql.catalyst.expressions.Size |
size |
SELECT size(array('b', 'd', 'c', 'a')) |
struct<size(array(b, d, c, a)):int> |
org.apache.spark.sql.catalyst.expressions.Slice |
slice |
SELECT slice(array(1, 2, 3, 4), 2, 2) |
struct<slice(array(1, 2, 3, 4), 2, 2):array> |
org.apache.spark.sql.catalyst.expressions.SortArray |
sort_array |
SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) |
struct<sort_array(array(b, d, NULL, c, a), true):array> |
org.apache.spark.sql.catalyst.expressions.SoundEx |
soundex |
SELECT soundex('Miller') |
struct<soundex(Miller):string> |
org.apache.spark.sql.catalyst.expressions.SparkPartitionID |
spark_partition_id |
SELECT spark_partition_id() |
struct<SPARK_PARTITION_ID():int> |
org.apache.spark.sql.catalyst.expressions.SparkVersion |
version |
SELECT version() |
struct<version():string> |
org.apache.spark.sql.catalyst.expressions.Sqrt |
sqrt |
SELECT sqrt(4) |
struct<SQRT(4):double> |
org.apache.spark.sql.catalyst.expressions.Stack |
stack |
SELECT stack(2, 1, 2, 3) |
structcol0:int,col1:int |
org.apache.spark.sql.catalyst.expressions.StringInstr |
instr |
SELECT instr('SparkSQL', 'SQL') |
struct<instr(SparkSQL, SQL):int> |
org.apache.spark.sql.catalyst.expressions.StringLPad |
lpad |
SELECT lpad('hi', 5, '??') |
struct<lpad(hi, 5, ??):string> |
org.apache.spark.sql.catalyst.expressions.StringLocate |
locate |
SELECT locate('bar', 'foobarbar') |
struct<locate(bar, foobarbar, 1):int> |
org.apache.spark.sql.catalyst.expressions.StringLocate |
position |
SELECT position('bar', 'foobarbar') |
struct<position(bar, foobarbar, 1):int> |
org.apache.spark.sql.catalyst.expressions.StringRPad |
rpad |
SELECT rpad('hi', 5, '??') |
struct<rpad(hi, 5, ??):string> |
org.apache.spark.sql.catalyst.expressions.StringRepeat |
repeat |
SELECT repeat('123', 2) |
struct<repeat(123, 2):string> |
org.apache.spark.sql.catalyst.expressions.StringReplace |
replace |
SELECT replace('ABCabc', 'abc', 'DEF') |
struct<replace(ABCabc, abc, DEF):string> |
org.apache.spark.sql.catalyst.expressions.StringSpace |
space |
SELECT concat(space(2), '1') |
struct<concat(space(2), 1):string> |
org.apache.spark.sql.catalyst.expressions.StringSplit |
split |
SELECT split('oneAtwoBthreeC', '[ABC]') |
struct<split(oneAtwoBthreeC, [ABC], -1):array> |
org.apache.spark.sql.catalyst.expressions.StringToMap |
str_to_map |
SELECT str_to_map('a:1,b:2,c:3', ',', ':') |
struct<str_to_map(a:1,b:2,c:3, ,, :):map<string,string>> |
org.apache.spark.sql.catalyst.expressions.StringTranslate |
translate |
SELECT translate('AaBbCc', 'abc', '123') |
struct<translate(AaBbCc, abc, 123):string> |
org.apache.spark.sql.catalyst.expressions.StringTrim |
trim |
SELECT trim(' SparkSQL ') |
struct<trim( SparkSQL ):string> |
org.apache.spark.sql.catalyst.expressions.StringTrimBoth |
btrim |
SELECT btrim(' SparkSQL ') |
struct<btrim( SparkSQL ):string> |
org.apache.spark.sql.catalyst.expressions.StringTrimLeft |
ltrim |
SELECT ltrim(' SparkSQL ') |
struct<ltrim( SparkSQL ):string> |
org.apache.spark.sql.catalyst.expressions.StringTrimRight |
rtrim |
SELECT rtrim(' SparkSQL ') |
struct<rtrim( SparkSQL ):string> |
org.apache.spark.sql.catalyst.expressions.StructsToCsv |
to_csv |
SELECT to_csv(named_struct('a', 1, 'b', 2)) |
struct<to_csv(named_struct(a, 1, b, 2)):string> |
org.apache.spark.sql.catalyst.expressions.StructsToJson |
to_json |
SELECT to_json(named_struct('a', 1, 'b', 2)) |
struct<to_json(named_struct(a, 1, b, 2)):string> |
org.apache.spark.sql.catalyst.expressions.Substring |
substr |
SELECT substr('Spark SQL', 5) |
struct<substr(Spark SQL, 5, 2147483647):string> |
org.apache.spark.sql.catalyst.expressions.Substring |
substring |
SELECT substring('Spark SQL', 5) |
struct<substring(Spark SQL, 5, 2147483647):string> |
org.apache.spark.sql.catalyst.expressions.SubstringIndex |
substring_index |
SELECT substring_index('www.apache.org', '.', 2) |
struct<substring_index(www.apache.org, ., 2):string> |
org.apache.spark.sql.catalyst.expressions.Subtract |
- |
SELECT 2 - 1 |
struct<(2 - 1):int> |
org.apache.spark.sql.catalyst.expressions.Tan |
tan |
SELECT tan(0) |
struct<TAN(0):double> |
org.apache.spark.sql.catalyst.expressions.Tanh |
tanh |
SELECT tanh(0) |
struct<TANH(0):double> |
org.apache.spark.sql.catalyst.expressions.TimeWindow |
window |
N/A |
N/A |
org.apache.spark.sql.catalyst.expressions.ToDegrees |
degrees |
SELECT degrees(3.141592653589793) |
struct<DEGREES(3.141592653589793):double> |
org.apache.spark.sql.catalyst.expressions.ToRadians |
radians |
SELECT radians(180) |
struct<RADIANS(180):double> |
org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp |
to_utc_timestamp |
SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') |
struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp |
to_unix_timestamp |
SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') |
struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint> |
org.apache.spark.sql.catalyst.expressions.TransformKeys |
transform_keys |
SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) |
struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>> |
org.apache.spark.sql.catalyst.expressions.TransformValues |
transform_values |
SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) |
struct<transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>> |
org.apache.spark.sql.catalyst.expressions.TruncDate |
trunc |
SELECT trunc('2019-08-04', 'week') |
struct<trunc(2019-08-04, week):date> |
org.apache.spark.sql.catalyst.expressions.TruncTimestamp |
date_trunc |
SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') |
struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp> |
org.apache.spark.sql.catalyst.expressions.TryAdd |
try_add |
SELECT try_add(1, 2) |
struct<try_add(1, 2):int> |
org.apache.spark.sql.catalyst.expressions.TryDivide |
try_divide |
SELECT try_divide(3, 2) |
struct<try_divide(3, 2):double> |
org.apache.spark.sql.catalyst.expressions.TypeOf |
typeof |
SELECT typeof(1) |
struct<typeof(1):string> |
org.apache.spark.sql.catalyst.expressions.UnBase64 |
unbase64 |
SELECT unbase64('U3BhcmsgU1FM') |
struct<unbase64(U3BhcmsgU1FM):binary> |
org.apache.spark.sql.catalyst.expressions.UnaryMinus |
negative |
SELECT negative(1) |
struct<negative(1):int> |
org.apache.spark.sql.catalyst.expressions.UnaryPositive |
positive |
SELECT positive(1) |
struct<(+ 1):int> |
org.apache.spark.sql.catalyst.expressions.Unhex |
unhex |
SELECT decode(unhex('537061726B2053514C'), 'UTF-8') |
struct<decode(unhex(537061726B2053514C), UTF-8):string> |
org.apache.spark.sql.catalyst.expressions.UnixDate |
unix_date |
SELECT unix_date(DATE("1970-01-02")) |
struct<unix_date(1970-01-02):int> |
org.apache.spark.sql.catalyst.expressions.UnixMicros |
unix_micros |
SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) |
struct<unix_micros(1970-01-01 00:00:01Z):bigint> |
org.apache.spark.sql.catalyst.expressions.UnixMillis |
unix_millis |
SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) |
struct<unix_millis(1970-01-01 00:00:01Z):bigint> |
org.apache.spark.sql.catalyst.expressions.UnixSeconds |
unix_seconds |
SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) |
struct<unix_seconds(1970-01-01 00:00:01Z):bigint> |
org.apache.spark.sql.catalyst.expressions.UnixTimestamp |
unix_timestamp |
SELECT unix_timestamp() |
struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> |
org.apache.spark.sql.catalyst.expressions.Upper |
ucase |
SELECT ucase('SparkSql') |
struct<ucase(SparkSql):string> |
org.apache.spark.sql.catalyst.expressions.Upper |
upper |
SELECT upper('SparkSql') |
struct<upper(SparkSql):string> |
org.apache.spark.sql.catalyst.expressions.Uuid |
uuid |
SELECT uuid() |
struct<uuid():string> |
org.apache.spark.sql.catalyst.expressions.WeekDay |
weekday |
SELECT weekday('2009-07-30') |
struct<weekday(2009-07-30):int> |
org.apache.spark.sql.catalyst.expressions.WeekOfYear |
weekofyear |
SELECT weekofyear('2008-02-20') |
struct<weekofyear(2008-02-20):int> |
org.apache.spark.sql.catalyst.expressions.WidthBucket |
width_bucket |
SELECT width_bucket(5.3, 0.2, 10.6, 5) |
struct<width_bucket(5.3, 0.2, 10.6, 5):bigint> |
org.apache.spark.sql.catalyst.expressions.XxHash64 |
xxhash64 |
SELECT xxhash64('Spark', array(123), 2) |
struct<xxhash64(Spark, array(123), 2):bigint> |
org.apache.spark.sql.catalyst.expressions.Year |
year |
SELECT year('2016-07-30') |
struct<year(2016-07-30):int> |
org.apache.spark.sql.catalyst.expressions.ZipWith |
zip_with |
SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) |
struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile |
approx_percentile |
SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) |
struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array> |
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile |
percentile_approx |
SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) |
struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array> |
org.apache.spark.sql.catalyst.expressions.aggregate.Average |
avg |
SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<avg(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.Average |
mean |
SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<mean(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg |
bit_and |
SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) |
struct<bit_and(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg |
bit_or |
SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) |
struct<bit_or(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg |
bit_xor |
SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) |
struct<bit_xor(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd |
bool_and |
SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) |
struct<bool_and(col):boolean> |
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd |
every |
SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) |
struct<every(col):boolean> |
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr |
any |
SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) |
struct<any(col):boolean> |
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr |
bool_or |
SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) |
struct<bool_or(col):boolean> |
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr |
some |
SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) |
struct<some(col):boolean> |
org.apache.spark.sql.catalyst.expressions.aggregate.CollectList |
collect_list |
SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) |
struct<collect_list(col):array> |
org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet |
collect_set |
SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) |
struct<collect_set(col):array> |
org.apache.spark.sql.catalyst.expressions.aggregate.Corr |
corr |
SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) |
struct<corr(c1, c2):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.Count |
count |
SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) |
struct<count(1):bigint> |
org.apache.spark.sql.catalyst.expressions.aggregate.CountIf |
count_if |
SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) |
struct<count_if(((col % 2) = 0)):bigint> |
org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg |
count_min_sketch |
SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) |
struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation |
covar_pop |
SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) |
struct<covar_pop(c1, c2):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.CovSample |
covar_samp |
SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) |
struct<covar_samp(c1, c2):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.First |
first |
SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) |
struct<first(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.First |
first_value |
SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) |
struct<first_value(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus |
approx_count_distinct |
SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) |
struct<approx_count_distinct(col1):bigint> |
org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis |
kurtosis |
SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) |
struct<kurtosis(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.Last |
last |
SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) |
struct<last(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.Last |
last_value |
SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) |
struct<last_value(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.Max |
max |
SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) |
struct<max(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy |
max_by |
SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<max_by(x, y):string> |
org.apache.spark.sql.catalyst.expressions.aggregate.Min |
min |
SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) |
struct<min(col):int> |
org.apache.spark.sql.catalyst.expressions.aggregate.MinBy |
min_by |
SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<min_by(x, y):string> |
org.apache.spark.sql.catalyst.expressions.aggregate.Percentile |
percentile |
SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) |
struct<percentile(col, 0.3, 1):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.Skewness |
skewness |
SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) |
struct<skewness(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop |
stddev_pop |
SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<stddev_pop(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp |
std |
SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<std(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp |
stddev |
SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<stddev(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp |
stddev_samp |
SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<stddev_samp(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.Sum |
sum |
SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) |
struct<sum(col):bigint> |
org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop |
var_pop |
SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<var_pop(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp |
var_samp |
SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<var_samp(col):double> |
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp |
variance |
SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<variance(col):double> |
org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean |
xpath_boolean |
SELECT xpath_boolean('1','a/b') |
struct<xpath_boolean(1, a/b):boolean> |
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble |
xpath_double |
SELECT xpath_double('12', 'sum(a/b)') |
struct<xpath_double(12, sum(a/b)):double> |
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble |
xpath_number |
SELECT xpath_number('12', 'sum(a/b)') |
struct<xpath_number(12, sum(a/b)):double> |
org.apache.spark.sql.catalyst.expressions.xml.XPathFloat |
xpath_float |
SELECT xpath_float('12', 'sum(a/b)') |
struct<xpath_float(12, sum(a/b)):float> |
org.apache.spark.sql.catalyst.expressions.xml.XPathInt |
xpath_int |
SELECT xpath_int('12', 'sum(a/b)') |
struct<xpath_int(12, sum(a/b)):int> |
org.apache.spark.sql.catalyst.expressions.xml.XPathList |
xpath |
SELECT xpath('b1b2b3c1c2','a/b/text()') |
struct<xpath(b1b2b3c1c2, a/b/text()):array> |
org.apache.spark.sql.catalyst.expressions.xml.XPathLong |
xpath_long |
SELECT xpath_long('12', 'sum(a/b)') |
struct<xpath_long(12, sum(a/b)):bigint> |
org.apache.spark.sql.catalyst.expressions.xml.XPathShort |
xpath_short |
SELECT xpath_short('12', 'sum(a/b)') |
struct<xpath_short(12, sum(a/b)):smallint> |
org.apache.spark.sql.catalyst.expressions.xml.XPathString |
xpath_string |
SELECT xpath_string('bcc','a/c') |
struct<xpath_string(bcc, a/c):string> |