Skip to content

Latest commit

 

History

History
364 lines (364 loc) · 54.1 KB

sql-expression-schema.md

File metadata and controls

364 lines (364 loc) · 54.1 KB

Summary

  • Number of queries: 356
  • Number of expressions that missing example: 13
  • Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window

Schema of Built-in Functions

Class name Function name or alias Query example Output schema
org.apache.spark.sql.catalyst.expressions.Abs abs SELECT abs(-1) struct<abs(-1):int>
org.apache.spark.sql.catalyst.expressions.Acos acos SELECT acos(1) struct<ACOS(1):double>
org.apache.spark.sql.catalyst.expressions.Acosh acosh SELECT acosh(1) struct<ACOSH(1):double>
org.apache.spark.sql.catalyst.expressions.Add + SELECT 1 + 2 struct<(1 + 2):int>
org.apache.spark.sql.catalyst.expressions.AddMonths add_months SELECT add_months('2016-08-31', 1) struct<add_months(2016-08-31, 1):date>
org.apache.spark.sql.catalyst.expressions.And and SELECT true and true struct<(true AND true):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayAggregate aggregate SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int>
org.apache.spark.sql.catalyst.expressions.ArrayContains array_contains SELECT array_contains(array(1, 2, 3), 2) struct<array_contains(array(1, 2, 3), 2):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayDistinct array_distinct SELECT array_distinct(array(1, 2, 3, null, 3)) struct<array_distinct(array(1, 2, 3, NULL, 3)):array>
org.apache.spark.sql.catalyst.expressions.ArrayExcept array_except SELECT array_except(array(1, 2, 3), array(1, 3, 5)) struct<array_except(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArrayExists exists SELECT exists(array(1, 2, 3), x -> x % 2 == 0) struct<exists(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayFilter filter SELECT filter(array(1, 2, 3), x -> x % 2 == 1) struct<filter(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 1), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayForAll forall SELECT forall(array(1, 2, 3), x -> x % 2 == 0) struct<forall(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayIntersect array_intersect SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) struct<array_intersect(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArrayJoin array_join SELECT array_join(array('hello', 'world'), ' ') struct<array_join(array(hello, world), ):string>
org.apache.spark.sql.catalyst.expressions.ArrayMax array_max SELECT array_max(array(1, 20, null, 3)) struct<array_max(array(1, 20, NULL, 3)):int>
org.apache.spark.sql.catalyst.expressions.ArrayMin array_min SELECT array_min(array(1, 20, null, 3)) struct<array_min(array(1, 20, NULL, 3)):int>
org.apache.spark.sql.catalyst.expressions.ArrayPosition array_position SELECT array_position(array(3, 2, 1), 1) struct<array_position(array(3, 2, 1), 1):bigint>
org.apache.spark.sql.catalyst.expressions.ArrayRemove array_remove SELECT array_remove(array(1, 2, 3, null, 3), 3) struct<array_remove(array(1, 2, 3, NULL, 3), 3):array>
org.apache.spark.sql.catalyst.expressions.ArrayRepeat array_repeat SELECT array_repeat('123', 2) struct<array_repeat(123, 2):array>
org.apache.spark.sql.catalyst.expressions.ArraySort array_sort SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) struct<array_sort(array(5, 6, 1), lambdafunction(CASE WHEN (namedlambdavariable() < namedlambdavariable()) THEN -1 WHEN (namedlambdavariable() > namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayTransform transform SELECT transform(array(1, 2, 3), x -> x + 1) struct<transform(array(1, 2, 3), lambdafunction((namedlambdavariable() + 1), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayUnion array_union SELECT array_union(array(1, 2, 3), array(1, 3, 5)) struct<array_union(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArraysOverlap arrays_overlap SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) struct<arrays_overlap(array(1, 2, 3), array(3, 4, 5)):boolean>
org.apache.spark.sql.catalyst.expressions.ArraysZip arrays_zip SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) struct<arrays_zip(array(1, 2, 3), array(2, 3, 4)):array<struct<0:int,1:int>>>
org.apache.spark.sql.catalyst.expressions.Ascii ascii SELECT ascii('222') struct<ascii(222):int>
org.apache.spark.sql.catalyst.expressions.Asin asin SELECT asin(0) struct<ASIN(0):double>
org.apache.spark.sql.catalyst.expressions.Asinh asinh SELECT asinh(0) struct<ASINH(0):double>
org.apache.spark.sql.catalyst.expressions.AssertTrue assert_true SELECT assert_true(0 < 1) struct<assert_true((0 < 1), '(0 < 1)' is not true!):null>
org.apache.spark.sql.catalyst.expressions.Atan atan SELECT atan(0) struct<ATAN(0):double>
org.apache.spark.sql.catalyst.expressions.Atan2 atan2 SELECT atan2(0, 0) struct<ATAN2(0, 0):double>
org.apache.spark.sql.catalyst.expressions.Atanh atanh SELECT atanh(0) struct<ATANH(0):double>
org.apache.spark.sql.catalyst.expressions.BRound bround SELECT bround(2.5, 0) struct<bround(2.5, 0):decimal(2,0)>
org.apache.spark.sql.catalyst.expressions.Base64 base64 SELECT base64('Spark SQL') struct<base64(Spark SQL):string>
org.apache.spark.sql.catalyst.expressions.Bin bin SELECT bin(13) struct<bin(13):string>
org.apache.spark.sql.catalyst.expressions.BitLength bit_length SELECT bit_length('Spark SQL') struct<bit_length(Spark SQL):int>
org.apache.spark.sql.catalyst.expressions.BitwiseAnd & SELECT 3 & 5 struct<(3 & 5):int>
org.apache.spark.sql.catalyst.expressions.BitwiseCount bit_count SELECT bit_count(0) struct<bit_count(0):int>
org.apache.spark.sql.catalyst.expressions.BitwiseGet bit_get SELECT bit_get(11, 0) struct<bit_get(11, 0):tinyint>
org.apache.spark.sql.catalyst.expressions.BitwiseGet getbit SELECT getbit(11, 0) struct<getbit(11, 0):tinyint>
org.apache.spark.sql.catalyst.expressions.BitwiseNot ~ SELECT ~ 0 struct<~0:int>
org.apache.spark.sql.catalyst.expressions.BitwiseOr | SELECT 3 | 5 struct<(3 | 5):int>
org.apache.spark.sql.catalyst.expressions.BitwiseXor ^ SELECT 3 ^ 5 struct<(3 ^ 5):int>
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection java_method SELECT java_method('java.util.UUID', 'randomUUID') struct<java_method(java.util.UUID, randomUUID):string>
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection reflect SELECT reflect('java.util.UUID', 'randomUUID') struct<reflect(java.util.UUID, randomUUID):string>
org.apache.spark.sql.catalyst.expressions.CaseWhen when SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END struct<CASE WHEN (1 > 0) THEN 1 WHEN (2 > 0) THEN 2.0 ELSE 1.2 END:decimal(11,1)>
org.apache.spark.sql.catalyst.expressions.Cast bigint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast binary N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast boolean N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast cast SELECT cast('10' as int) struct<CAST(10 AS INT):int>
org.apache.spark.sql.catalyst.expressions.Cast date N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast decimal N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast double N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast float N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast int N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast smallint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast string N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast timestamp N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast tinyint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cbrt cbrt SELECT cbrt(27.0) struct<CBRT(27.0):double>
org.apache.spark.sql.catalyst.expressions.Ceil ceil SELECT ceil(-0.1) struct<CEIL(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.Ceil ceiling SELECT ceiling(-0.1) struct<ceiling(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.Chr char SELECT char(65) struct<char(65):string>
org.apache.spark.sql.catalyst.expressions.Chr chr SELECT chr(65) struct<chr(65):string>
org.apache.spark.sql.catalyst.expressions.Coalesce coalesce SELECT coalesce(NULL, 1, NULL) struct<coalesce(NULL, 1, NULL):int>
org.apache.spark.sql.catalyst.expressions.Concat concat SELECT concat('Spark', 'SQL') struct<concat(Spark, SQL):string>
org.apache.spark.sql.catalyst.expressions.ConcatWs concat_ws SELECT concat_ws(' ', 'Spark', 'SQL') struct<concat_ws( , Spark, SQL):string>
org.apache.spark.sql.catalyst.expressions.Conv conv SELECT conv('100', 2, 10) struct<conv(100, 2, 10):string>
org.apache.spark.sql.catalyst.expressions.Cos cos SELECT cos(0) struct<COS(0):double>
org.apache.spark.sql.catalyst.expressions.Cosh cosh SELECT cosh(0) struct<COSH(0):double>
org.apache.spark.sql.catalyst.expressions.Cot cot SELECT cot(1) struct<COT(1):double>
org.apache.spark.sql.catalyst.expressions.Crc32 crc32 SELECT crc32('Spark') struct<crc32(Spark):bigint>
org.apache.spark.sql.catalyst.expressions.CreateArray array SELECT array(1, 2, 3) struct<array(1, 2, 3):array>
org.apache.spark.sql.catalyst.expressions.CreateMap map SELECT map(1.0, '2', 3.0, '4') struct<map(1.0, 2, 3.0, 4):map<decimal(2,1),string>>
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct named_struct SELECT named_struct("a", 1, "b", 2, "c", 3) struct<named_struct(a, 1, b, 2, c, 3):struct<a:int,b:int,c:int>>
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct struct SELECT struct(1, 2, 3) struct<struct(1, 2, 3):structcol1:int,col2:int,col3:int>
org.apache.spark.sql.catalyst.expressions.CsvToStructs from_csv SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') struct<from_csv(1, 0.8):struct<a:int,b:double>>
org.apache.spark.sql.catalyst.expressions.CumeDist cume_dist SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
org.apache.spark.sql.catalyst.expressions.CurrentCatalog current_catalog SELECT current_catalog() struct<current_catalog():string>
org.apache.spark.sql.catalyst.expressions.CurrentDatabase current_database SELECT current_database() struct<current_database():string>
org.apache.spark.sql.catalyst.expressions.CurrentDate current_date SELECT current_date() struct<current_date():date>
org.apache.spark.sql.catalyst.expressions.CurrentTimeZone current_timezone SELECT current_timezone() struct<current_timezone():string>
org.apache.spark.sql.catalyst.expressions.CurrentTimestamp current_timestamp SELECT current_timestamp() struct<current_timestamp():timestamp>
org.apache.spark.sql.catalyst.expressions.CurrentUser current_user SELECT current_user() struct<current_user():string>
org.apache.spark.sql.catalyst.expressions.DateAdd date_add SELECT date_add('2016-07-30', 1) struct<date_add(2016-07-30, 1):date>
org.apache.spark.sql.catalyst.expressions.DateDiff datediff SELECT datediff('2009-07-31', '2009-07-30') struct<datediff(2009-07-31, 2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DateFormatClass date_format SELECT date_format('2016-04-08', 'y') struct<date_format(2016-04-08, y):string>
org.apache.spark.sql.catalyst.expressions.DateFromUnixDate date_from_unix_date SELECT date_from_unix_date(1) struct<date_from_unix_date(1):date>
org.apache.spark.sql.catalyst.expressions.DatePart date_part SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int>
org.apache.spark.sql.catalyst.expressions.DateSub date_sub SELECT date_sub('2016-07-30', 1) struct<date_sub(2016-07-30, 1):date>
org.apache.spark.sql.catalyst.expressions.DayOfMonth day SELECT day('2009-07-30') struct<day(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfMonth dayofmonth SELECT dayofmonth('2009-07-30') struct<dayofmonth(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfWeek dayofweek SELECT dayofweek('2009-07-30') struct<dayofweek(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfYear dayofyear SELECT dayofyear('2016-04-09') struct<dayofyear(2016-04-09):int>
org.apache.spark.sql.catalyst.expressions.Decode decode SELECT decode(encode('abc', 'utf-8'), 'utf-8') struct<decode(encode(abc, utf-8), utf-8):string>
org.apache.spark.sql.catalyst.expressions.DenseRank dense_rank SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,DENSE_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.Divide / SELECT 3 / 2 struct<(3 / 2):double>
org.apache.spark.sql.catalyst.expressions.ElementAt element_at SELECT element_at(array(1, 2, 3), 2) struct<element_at(array(1, 2, 3), 2):int>
org.apache.spark.sql.catalyst.expressions.Elt elt SELECT elt(1, 'scala', 'java') struct<elt(1, scala, java):string>
org.apache.spark.sql.catalyst.expressions.Encode encode SELECT encode('abc', 'utf-8') struct<encode(abc, utf-8):binary>
org.apache.spark.sql.catalyst.expressions.EqualNullSafe <=> SELECT 2 <=> 2 struct<(2 <=> 2):boolean>
org.apache.spark.sql.catalyst.expressions.EqualTo = SELECT 2 = 2 struct<(2 = 2):boolean>
org.apache.spark.sql.catalyst.expressions.EqualTo == SELECT 2 == 2 struct<(2 = 2):boolean>
org.apache.spark.sql.catalyst.expressions.EulerNumber e SELECT e() struct<E():double>
org.apache.spark.sql.catalyst.expressions.Exp exp SELECT exp(0) struct<EXP(0):double>
org.apache.spark.sql.catalyst.expressions.Explode explode SELECT explode(array(10, 20)) structcol:int
org.apache.spark.sql.catalyst.expressions.Explode explode_outer SELECT explode_outer(array(10, 20)) structcol:int
org.apache.spark.sql.catalyst.expressions.Expm1 expm1 SELECT expm1(0) struct<EXPM1(0):double>
org.apache.spark.sql.catalyst.expressions.Extract extract SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int>
org.apache.spark.sql.catalyst.expressions.Factorial factorial SELECT factorial(5) struct<factorial(5):bigint>
org.apache.spark.sql.catalyst.expressions.FindInSet find_in_set SELECT find_in_set('ab','abc,b,ab,c,def') struct<find_in_set(ab, abc,b,ab,c,def):int>
org.apache.spark.sql.catalyst.expressions.Flatten flatten SELECT flatten(array(array(1, 2), array(3, 4))) struct<flatten(array(array(1, 2), array(3, 4))):array>
org.apache.spark.sql.catalyst.expressions.Floor floor SELECT floor(-0.1) struct<FLOOR(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.FormatNumber format_number SELECT format_number(12332.123456, 4) struct<format_number(12332.123456, 4):string>
org.apache.spark.sql.catalyst.expressions.FormatString format_string SELECT format_string("Hello World %d %s", 100, "days") struct<format_string(Hello World %d %s, 100, days):string>
org.apache.spark.sql.catalyst.expressions.FormatString printf SELECT printf("Hello World %d %s", 100, "days") struct<printf(Hello World %d %s, 100, days):string>
org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp from_utc_timestamp SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') struct<from_utc_timestamp(2016-08-31, Asia/Seoul):timestamp>
org.apache.spark.sql.catalyst.expressions.FromUnixTime from_unixtime SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') struct<from_unixtime(0, yyyy-MM-dd HH:mm:ss):string>
org.apache.spark.sql.catalyst.expressions.GetJsonObject get_json_object SELECT get_json_object('{"a":"b"}', '$.a') struct<get_json_object({"a":"b"}, $.a):string>
org.apache.spark.sql.catalyst.expressions.GreaterThan > SELECT 2 > 1 struct<(2 > 1):boolean>
org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual >= SELECT 2 >= 1 struct<(2 >= 1):boolean>
org.apache.spark.sql.catalyst.expressions.Greatest greatest SELECT greatest(10, 9, 2, 4, 3) struct<greatest(10, 9, 2, 4, 3):int>
org.apache.spark.sql.catalyst.expressions.Grouping grouping SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) structname:string,grouping(name):tinyint,sum(age):bigint
org.apache.spark.sql.catalyst.expressions.GroupingID grouping_id SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) structname:string,grouping_id():bigint,sum(age):bigint,avg(height):double
org.apache.spark.sql.catalyst.expressions.Hex hex SELECT hex(17) struct<hex(17):string>
org.apache.spark.sql.catalyst.expressions.Hour hour SELECT hour('2009-07-30 12:58:59') struct<hour(2009-07-30 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.Hypot hypot SELECT hypot(3, 4) struct<HYPOT(3, 4):double>
org.apache.spark.sql.catalyst.expressions.If if SELECT if(1 < 2, 'a', 'b') struct<(IF((1 < 2), a, b)):string>
org.apache.spark.sql.catalyst.expressions.IfNull ifnull SELECT ifnull(NULL, array('2')) struct<ifnull(NULL, array(2)):array>
org.apache.spark.sql.catalyst.expressions.In in SELECT 1 in(1, 2, 3) struct<(1 IN (1, 2, 3)):boolean>
org.apache.spark.sql.catalyst.expressions.InitCap initcap SELECT initcap('sPark sql') struct<initcap(sPark sql):string>
org.apache.spark.sql.catalyst.expressions.Inline inline SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) structcol1:int,col2:string
org.apache.spark.sql.catalyst.expressions.Inline inline_outer SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) structcol1:int,col2:string
org.apache.spark.sql.catalyst.expressions.InputFileBlockLength input_file_block_length SELECT input_file_block_length() struct<input_file_block_length():bigint>
org.apache.spark.sql.catalyst.expressions.InputFileBlockStart input_file_block_start SELECT input_file_block_start() struct<input_file_block_start():bigint>
org.apache.spark.sql.catalyst.expressions.InputFileName input_file_name SELECT input_file_name() struct<input_file_name():string>
org.apache.spark.sql.catalyst.expressions.IntegralDivide div SELECT 3 div 2 struct<(3 div 2):bigint>
org.apache.spark.sql.catalyst.expressions.IsNaN isnan SELECT isnan(cast('NaN' as double)) struct<isnan(CAST(NaN AS DOUBLE)):boolean>
org.apache.spark.sql.catalyst.expressions.IsNotNull isnotnull SELECT isnotnull(1) struct<(1 IS NOT NULL):boolean>
org.apache.spark.sql.catalyst.expressions.IsNull isnull SELECT isnull(1) struct<(1 IS NULL):boolean>
org.apache.spark.sql.catalyst.expressions.JsonObjectKeys json_object_keys SELECT json_object_keys('{}') struct<json_object_keys({}):array>
org.apache.spark.sql.catalyst.expressions.JsonToStructs from_json SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>>
org.apache.spark.sql.catalyst.expressions.JsonTuple json_tuple SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') structc0:string,c1:string
org.apache.spark.sql.catalyst.expressions.Lag lag SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,lag(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int>
org.apache.spark.sql.catalyst.expressions.LastDay last_day SELECT last_day('2009-01-12') struct<last_day(2009-01-12):date>
org.apache.spark.sql.catalyst.expressions.Lead lead SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,lead(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int>
org.apache.spark.sql.catalyst.expressions.Least least SELECT least(10, 9, 2, 4, 3) struct<least(10, 9, 2, 4, 3):int>
org.apache.spark.sql.catalyst.expressions.Left left SELECT left('Spark SQL', 3) struct<left(Spark SQL, 3):string>
org.apache.spark.sql.catalyst.expressions.Length char_length SELECT char_length('Spark SQL ') struct<char_length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.Length character_length SELECT character_length('Spark SQL ') struct<character_length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.Length length SELECT length('Spark SQL ') struct<length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray json_array_length SELECT json_array_length('[1,2,3,4]') struct<json_array_length([1,2,3,4]):int>
org.apache.spark.sql.catalyst.expressions.LessThan < SELECT 1 < 2 struct<(1 < 2):boolean>
org.apache.spark.sql.catalyst.expressions.LessThanOrEqual <= SELECT 2 <= 2 struct<(2 <= 2):boolean>
org.apache.spark.sql.catalyst.expressions.Levenshtein levenshtein SELECT levenshtein('kitten', 'sitting') struct<levenshtein(kitten, sitting):int>
org.apache.spark.sql.catalyst.expressions.Like like SELECT like('Spark', '_park') struct
org.apache.spark.sql.catalyst.expressions.Log ln SELECT ln(1) struct<ln(1):double>
org.apache.spark.sql.catalyst.expressions.Log10 log10 SELECT log10(10) struct<LOG10(10):double>
org.apache.spark.sql.catalyst.expressions.Log1p log1p SELECT log1p(0) struct<LOG1P(0):double>
org.apache.spark.sql.catalyst.expressions.Log2 log2 SELECT log2(2) struct<LOG2(2):double>
org.apache.spark.sql.catalyst.expressions.Logarithm log SELECT log(10, 100) struct<LOG(10, 100):double>
org.apache.spark.sql.catalyst.expressions.Lower lcase SELECT lcase('SparkSql') struct<lcase(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Lower lower SELECT lower('SparkSql') struct<lower(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.MakeDTInterval make_dt_interval SELECT make_dt_interval(1, 12, 30, 01.001001) struct<make_dt_interval(1, 12, 30, 1.001001):interval day to second>
org.apache.spark.sql.catalyst.expressions.MakeDate make_date SELECT make_date(2013, 7, 15) struct<make_date(2013, 7, 15):date>
org.apache.spark.sql.catalyst.expressions.MakeInterval make_interval SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval>
org.apache.spark.sql.catalyst.expressions.MakeTimestamp make_timestamp SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp>
org.apache.spark.sql.catalyst.expressions.MakeYMInterval make_ym_interval SELECT make_ym_interval(1, 2) struct<make_ym_interval(1, 2):interval year to month>
org.apache.spark.sql.catalyst.expressions.MapConcat map_concat SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>>
org.apache.spark.sql.catalyst.expressions.MapEntries map_entries SELECT map_entries(map(1, 'a', 2, 'b')) struct<map_entries(map(1, a, 2, b)):array<structkey:int,value:string>>
org.apache.spark.sql.catalyst.expressions.MapFilter map_filter SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) struct<map_filter(map(1, 0, 2, 2, 3, -1), lambdafunction((namedlambdavariable() > namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.MapFromArrays map_from_arrays SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) struct<map_from_arrays(array(1.0, 3.0), array(2, 4)):map<decimal(2,1),string>>
org.apache.spark.sql.catalyst.expressions.MapFromEntries map_from_entries SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) struct<map_from_entries(array(struct(1, a), struct(2, b))):map<int,string>>
org.apache.spark.sql.catalyst.expressions.MapKeys map_keys SELECT map_keys(map(1, 'a', 2, 'b')) struct<map_keys(map(1, a, 2, b)):array>
org.apache.spark.sql.catalyst.expressions.MapValues map_values SELECT map_values(map(1, 'a', 2, 'b')) struct<map_values(map(1, a, 2, b)):array>
org.apache.spark.sql.catalyst.expressions.MapZipWith map_zip_with SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>>
org.apache.spark.sql.catalyst.expressions.Md5 md5 SELECT md5('Spark') struct<md5(Spark):string>
org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp timestamp_micros SELECT timestamp_micros(1230219000123123) struct<timestamp_micros(1230219000123123):timestamp>
org.apache.spark.sql.catalyst.expressions.MillisToTimestamp timestamp_millis SELECT timestamp_millis(1230219000123) struct<timestamp_millis(1230219000123):timestamp>
org.apache.spark.sql.catalyst.expressions.Minute minute SELECT minute('2009-07-30 12:58:59') struct<minute(2009-07-30 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID monotonically_increasing_id SELECT monotonically_increasing_id() struct<monotonically_increasing_id():bigint>
org.apache.spark.sql.catalyst.expressions.Month month SELECT month('2016-07-30') struct<month(2016-07-30):int>
org.apache.spark.sql.catalyst.expressions.MonthsBetween months_between SELECT months_between('1997-02-28 10:30:00', '1996-10-30') struct<months_between(1997-02-28 10:30:00, 1996-10-30, true):double>
org.apache.spark.sql.catalyst.expressions.Multiply * SELECT 2 * 3 struct<(2 * 3):int>
org.apache.spark.sql.catalyst.expressions.Murmur3Hash hash SELECT hash('Spark', array(123), 2) struct<hash(Spark, array(123), 2):int>
org.apache.spark.sql.catalyst.expressions.NTile ntile SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,ntile(2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.NaNvl nanvl SELECT nanvl(cast('NaN' as double), 123) struct<nanvl(CAST(NaN AS DOUBLE), 123):double>
org.apache.spark.sql.catalyst.expressions.NextDay next_day SELECT next_day('2015-01-14', 'TU') struct<next_day(2015-01-14, TU):date>
org.apache.spark.sql.catalyst.expressions.Not ! SELECT ! true struct<(NOT true):boolean>
org.apache.spark.sql.catalyst.expressions.Not not SELECT not true struct<(NOT true):boolean>
org.apache.spark.sql.catalyst.expressions.Now now SELECT now() struct<now():timestamp>
org.apache.spark.sql.catalyst.expressions.NthValue nth_value SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.NullIf nullif SELECT nullif(2, 2) struct<nullif(2, 2):int>
org.apache.spark.sql.catalyst.expressions.Nvl nvl SELECT nvl(NULL, array('2')) struct<nvl(NULL, array(2)):array>
org.apache.spark.sql.catalyst.expressions.Nvl2 nvl2 SELECT nvl2(NULL, 2, 1) struct<nvl2(NULL, 2, 1):int>
org.apache.spark.sql.catalyst.expressions.OctetLength octet_length SELECT octet_length('Spark SQL') struct<octet_length(Spark SQL):int>
org.apache.spark.sql.catalyst.expressions.Or or SELECT true or false struct<(true OR false):boolean>
org.apache.spark.sql.catalyst.expressions.Overlay overlay SELECT overlay('Spark SQL' PLACING '_' FROM 6) struct<overlay(Spark SQL, _, 6, -1):string>
org.apache.spark.sql.catalyst.expressions.ParseToDate to_date SELECT to_date('2009-07-30 04:17:52') struct<to_date(2009-07-30 04:17:52):date>
org.apache.spark.sql.catalyst.expressions.ParseToTimestamp to_timestamp SELECT to_timestamp('2016-12-31 00:12:00') struct<to_timestamp(2016-12-31 00:12:00):timestamp>
org.apache.spark.sql.catalyst.expressions.ParseToTimestampWithoutTZ to_timestamp_ntz SELECT to_timestamp_ntz('2016-12-31 00:12:00') struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp without time zone>
org.apache.spark.sql.catalyst.expressions.ParseUrl parse_url SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') struct<parse_url(http://spark.apache.org/path?query=1, HOST):string>
org.apache.spark.sql.catalyst.expressions.PercentRank percent_rank SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
org.apache.spark.sql.catalyst.expressions.Pi pi SELECT pi() struct<PI():double>
org.apache.spark.sql.catalyst.expressions.Pmod pmod SELECT pmod(10, 3) struct<pmod(10, 3):int>
org.apache.spark.sql.catalyst.expressions.PosExplode posexplode SELECT posexplode(array(10,20)) structpos:int,col:int
org.apache.spark.sql.catalyst.expressions.PosExplode posexplode_outer SELECT posexplode_outer(array(10,20)) structpos:int,col:int
org.apache.spark.sql.catalyst.expressions.Pow pow SELECT pow(2, 3) struct<pow(2, 3):double>
org.apache.spark.sql.catalyst.expressions.Pow power SELECT power(2, 3) struct<POWER(2, 3):double>
org.apache.spark.sql.catalyst.expressions.Quarter quarter SELECT quarter('2016-08-31') struct<quarter(2016-08-31):int>
org.apache.spark.sql.catalyst.expressions.RLike regexp SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RLike regexp_like SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RLike rlike SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RaiseError raise_error SELECT raise_error('custom error message') struct<raise_error(custom error message):null>
org.apache.spark.sql.catalyst.expressions.Rand rand SELECT rand() struct<rand():double>
org.apache.spark.sql.catalyst.expressions.Rand random SELECT random() struct<rand():double>
org.apache.spark.sql.catalyst.expressions.Randn randn SELECT randn() struct<randn():double>
org.apache.spark.sql.catalyst.expressions.Rank rank SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.RegExpExtract regexp_extract SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) struct<regexp_extract(100-200, (\d+)-(\d+), 1):string>
org.apache.spark.sql.catalyst.expressions.RegExpExtractAll regexp_extract_all SELECT regexp_extract_all('100-200, 300-400', '(\d+)-(\d+)', 1) struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array>
org.apache.spark.sql.catalyst.expressions.RegExpReplace regexp_replace SELECT regexp_replace('100-200', '(\d+)', 'num') struct<regexp_replace(100-200, (\d+), num, 1):string>
org.apache.spark.sql.catalyst.expressions.Remainder % SELECT 2 % 1.8 struct<(2 % 1.8):decimal(2,1)>
org.apache.spark.sql.catalyst.expressions.Remainder mod SELECT 2 % 1.8 struct<(2 % 1.8):decimal(2,1)>
org.apache.spark.sql.catalyst.expressions.Reverse reverse SELECT reverse('Spark SQL') struct<reverse(Spark SQL):string>
org.apache.spark.sql.catalyst.expressions.Right right SELECT right('Spark SQL', 3) struct<right(Spark SQL, 3):string>
org.apache.spark.sql.catalyst.expressions.Rint rint SELECT rint(12.3456) struct<rint(12.3456):double>
org.apache.spark.sql.catalyst.expressions.Round round SELECT round(2.5, 0) struct<round(2.5, 0):decimal(2,0)>
org.apache.spark.sql.catalyst.expressions.RowNumber row_number SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,row_number() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.SchemaOfCsv schema_of_csv SELECT schema_of_csv('1,abc') struct<schema_of_csv(1,abc):string>
org.apache.spark.sql.catalyst.expressions.SchemaOfJson schema_of_json SELECT schema_of_json('[{"col":0}]') struct<schema_of_json([{"col":0}]):string>
org.apache.spark.sql.catalyst.expressions.Second second SELECT second('2009-07-30 12:58:59') struct<second(2009-07-30 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp timestamp_seconds SELECT timestamp_seconds(1230219000) struct<timestamp_seconds(1230219000):timestamp>
org.apache.spark.sql.catalyst.expressions.Sentences sentences SELECT sentences('Hi there! Good morning.') struct<sentences(Hi there! Good morning., , ):array<array>>
org.apache.spark.sql.catalyst.expressions.Sequence sequence SELECT sequence(1, 5) struct<sequence(1, 5):array>
org.apache.spark.sql.catalyst.expressions.Sha1 sha SELECT sha('Spark') struct<sha(Spark):string>
org.apache.spark.sql.catalyst.expressions.Sha1 sha1 SELECT sha1('Spark') struct<sha1(Spark):string>
org.apache.spark.sql.catalyst.expressions.Sha2 sha2 SELECT sha2('Spark', 256) struct<sha2(Spark, 256):string>
org.apache.spark.sql.catalyst.expressions.ShiftLeft shiftleft SELECT shiftleft(2, 1) struct<shiftleft(2, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRight shiftright SELECT shiftright(4, 1) struct<shiftright(4, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned shiftrightunsigned SELECT shiftrightunsigned(4, 1) struct<shiftrightunsigned(4, 1):int>
org.apache.spark.sql.catalyst.expressions.Shuffle shuffle SELECT shuffle(array(1, 20, 3, 5)) struct<shuffle(array(1, 20, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.Signum sign SELECT sign(40) struct<sign(40):double>
org.apache.spark.sql.catalyst.expressions.Signum signum SELECT signum(40) struct<SIGNUM(40):double>
org.apache.spark.sql.catalyst.expressions.Sin sin SELECT sin(0) struct<SIN(0):double>
org.apache.spark.sql.catalyst.expressions.Sinh sinh SELECT sinh(0) struct<SINH(0):double>
org.apache.spark.sql.catalyst.expressions.Size cardinality SELECT cardinality(array('b', 'd', 'c', 'a')) struct<cardinality(array(b, d, c, a)):int>
org.apache.spark.sql.catalyst.expressions.Size size SELECT size(array('b', 'd', 'c', 'a')) struct<size(array(b, d, c, a)):int>
org.apache.spark.sql.catalyst.expressions.Slice slice SELECT slice(array(1, 2, 3, 4), 2, 2) struct<slice(array(1, 2, 3, 4), 2, 2):array>
org.apache.spark.sql.catalyst.expressions.SortArray sort_array SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) struct<sort_array(array(b, d, NULL, c, a), true):array>
org.apache.spark.sql.catalyst.expressions.SoundEx soundex SELECT soundex('Miller') struct<soundex(Miller):string>
org.apache.spark.sql.catalyst.expressions.SparkPartitionID spark_partition_id SELECT spark_partition_id() struct<SPARK_PARTITION_ID():int>
org.apache.spark.sql.catalyst.expressions.SparkVersion version SELECT version() struct<version():string>
org.apache.spark.sql.catalyst.expressions.Sqrt sqrt SELECT sqrt(4) struct<SQRT(4):double>
org.apache.spark.sql.catalyst.expressions.Stack stack SELECT stack(2, 1, 2, 3) structcol0:int,col1:int
org.apache.spark.sql.catalyst.expressions.StringInstr instr SELECT instr('SparkSQL', 'SQL') struct<instr(SparkSQL, SQL):int>
org.apache.spark.sql.catalyst.expressions.StringLPad lpad SELECT lpad('hi', 5, '??') struct<lpad(hi, 5, ??):string>
org.apache.spark.sql.catalyst.expressions.StringLocate locate SELECT locate('bar', 'foobarbar') struct<locate(bar, foobarbar, 1):int>
org.apache.spark.sql.catalyst.expressions.StringLocate position SELECT position('bar', 'foobarbar') struct<position(bar, foobarbar, 1):int>
org.apache.spark.sql.catalyst.expressions.StringRPad rpad SELECT rpad('hi', 5, '??') struct<rpad(hi, 5, ??):string>
org.apache.spark.sql.catalyst.expressions.StringRepeat repeat SELECT repeat('123', 2) struct<repeat(123, 2):string>
org.apache.spark.sql.catalyst.expressions.StringReplace replace SELECT replace('ABCabc', 'abc', 'DEF') struct<replace(ABCabc, abc, DEF):string>
org.apache.spark.sql.catalyst.expressions.StringSpace space SELECT concat(space(2), '1') struct<concat(space(2), 1):string>
org.apache.spark.sql.catalyst.expressions.StringSplit split SELECT split('oneAtwoBthreeC', '[ABC]') struct<split(oneAtwoBthreeC, [ABC], -1):array>
org.apache.spark.sql.catalyst.expressions.StringToMap str_to_map SELECT str_to_map('a:1,b:2,c:3', ',', ':') struct<str_to_map(a:1,b:2,c:3, ,, :):map<string,string>>
org.apache.spark.sql.catalyst.expressions.StringTranslate translate SELECT translate('AaBbCc', 'abc', '123') struct<translate(AaBbCc, abc, 123):string>
org.apache.spark.sql.catalyst.expressions.StringTrim trim SELECT trim(' SparkSQL ') struct<trim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimBoth btrim SELECT btrim(' SparkSQL ') struct<btrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimLeft ltrim SELECT ltrim(' SparkSQL ') struct<ltrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimRight rtrim SELECT rtrim(' SparkSQL ') struct<rtrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StructsToCsv to_csv SELECT to_csv(named_struct('a', 1, 'b', 2)) struct<to_csv(named_struct(a, 1, b, 2)):string>
org.apache.spark.sql.catalyst.expressions.StructsToJson to_json SELECT to_json(named_struct('a', 1, 'b', 2)) struct<to_json(named_struct(a, 1, b, 2)):string>
org.apache.spark.sql.catalyst.expressions.Substring substr SELECT substr('Spark SQL', 5) struct<substr(Spark SQL, 5, 2147483647):string>
org.apache.spark.sql.catalyst.expressions.Substring substring SELECT substring('Spark SQL', 5) struct<substring(Spark SQL, 5, 2147483647):string>
org.apache.spark.sql.catalyst.expressions.SubstringIndex substring_index SELECT substring_index('www.apache.org', '.', 2) struct<substring_index(www.apache.org, ., 2):string>
org.apache.spark.sql.catalyst.expressions.Subtract - SELECT 2 - 1 struct<(2 - 1):int>
org.apache.spark.sql.catalyst.expressions.Tan tan SELECT tan(0) struct<TAN(0):double>
org.apache.spark.sql.catalyst.expressions.Tanh tanh SELECT tanh(0) struct<TANH(0):double>
org.apache.spark.sql.catalyst.expressions.TimeWindow window N/A N/A
org.apache.spark.sql.catalyst.expressions.ToDegrees degrees SELECT degrees(3.141592653589793) struct<DEGREES(3.141592653589793):double>
org.apache.spark.sql.catalyst.expressions.ToRadians radians SELECT radians(180) struct<RADIANS(180):double>
org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp to_utc_timestamp SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp>
org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp to_unix_timestamp SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint>
org.apache.spark.sql.catalyst.expressions.TransformKeys transform_keys SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.TransformValues transform_values SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) struct<transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.TruncDate trunc SELECT trunc('2019-08-04', 'week') struct<trunc(2019-08-04, week):date>
org.apache.spark.sql.catalyst.expressions.TruncTimestamp date_trunc SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp>
org.apache.spark.sql.catalyst.expressions.TryAdd try_add SELECT try_add(1, 2) struct<try_add(1, 2):int>
org.apache.spark.sql.catalyst.expressions.TryDivide try_divide SELECT try_divide(3, 2) struct<try_divide(3, 2):double>
org.apache.spark.sql.catalyst.expressions.TypeOf typeof SELECT typeof(1) struct<typeof(1):string>
org.apache.spark.sql.catalyst.expressions.UnBase64 unbase64 SELECT unbase64('U3BhcmsgU1FM') struct<unbase64(U3BhcmsgU1FM):binary>
org.apache.spark.sql.catalyst.expressions.UnaryMinus negative SELECT negative(1) struct<negative(1):int>
org.apache.spark.sql.catalyst.expressions.UnaryPositive positive SELECT positive(1) struct<(+ 1):int>
org.apache.spark.sql.catalyst.expressions.Unhex unhex SELECT decode(unhex('537061726B2053514C'), 'UTF-8') struct<decode(unhex(537061726B2053514C), UTF-8):string>
org.apache.spark.sql.catalyst.expressions.UnixDate unix_date SELECT unix_date(DATE("1970-01-02")) struct<unix_date(1970-01-02):int>
org.apache.spark.sql.catalyst.expressions.UnixMicros unix_micros SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_micros(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixMillis unix_millis SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_millis(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixSeconds unix_seconds SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_seconds(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixTimestamp unix_timestamp SELECT unix_timestamp() struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint>
org.apache.spark.sql.catalyst.expressions.Upper ucase SELECT ucase('SparkSql') struct<ucase(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Upper upper SELECT upper('SparkSql') struct<upper(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Uuid uuid SELECT uuid() struct<uuid():string>
org.apache.spark.sql.catalyst.expressions.WeekDay weekday SELECT weekday('2009-07-30') struct<weekday(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.WeekOfYear weekofyear SELECT weekofyear('2008-02-20') struct<weekofyear(2008-02-20):int>
org.apache.spark.sql.catalyst.expressions.WidthBucket width_bucket SELECT width_bucket(5.3, 0.2, 10.6, 5) struct<width_bucket(5.3, 0.2, 10.6, 5):bigint>
org.apache.spark.sql.catalyst.expressions.XxHash64 xxhash64 SELECT xxhash64('Spark', array(123), 2) struct<xxhash64(Spark, array(123), 2):bigint>
org.apache.spark.sql.catalyst.expressions.Year year SELECT year('2016-07-30') struct<year(2016-07-30):int>
org.apache.spark.sql.catalyst.expressions.ZipWith zip_with SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>>
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile approx_percentile SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array>
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile percentile_approx SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array>
org.apache.spark.sql.catalyst.expressions.aggregate.Average avg SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) struct<avg(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Average mean SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) struct<mean(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg bit_and SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) struct<bit_and(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg bit_or SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) struct<bit_or(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg bit_xor SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) struct<bit_xor(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd bool_and SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) struct<bool_and(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd every SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) struct<every(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr any SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) struct<any(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr bool_or SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) struct<bool_or(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr some SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) struct<some(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.CollectList collect_list SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) struct<collect_list(col):array>
org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet collect_set SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) struct<collect_set(col):array>
org.apache.spark.sql.catalyst.expressions.aggregate.Corr corr SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) struct<corr(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Count count SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) struct<count(1):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.CountIf count_if SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) struct<count_if(((col % 2) = 0)):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg count_min_sketch SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string>
org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation covar_pop SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) struct<covar_pop(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.CovSample covar_samp SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) struct<covar_samp(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.First first SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) struct<first(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.First first_value SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) struct<first_value(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus approx_count_distinct SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) struct<approx_count_distinct(col1):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis kurtosis SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) struct<kurtosis(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Last last SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) struct<last(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.Last last_value SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) struct<last_value(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.Max max SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) struct<max(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy max_by SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) struct<max_by(x, y):string>
org.apache.spark.sql.catalyst.expressions.aggregate.Min min SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) struct<min(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.MinBy min_by SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) struct<min_by(x, y):string>
org.apache.spark.sql.catalyst.expressions.aggregate.Percentile percentile SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) struct<percentile(col, 0.3, 1):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Skewness skewness SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) struct<skewness(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop stddev_pop SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev_pop(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp std SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) struct<std(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp stddev SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp stddev_samp SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev_samp(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Sum sum SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) struct<sum(col):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop var_pop SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) struct<var_pop(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp var_samp SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) struct<var_samp(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp variance SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) struct<variance(col):double>
org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean xpath_boolean SELECT xpath_boolean('1','a/b') struct<xpath_boolean(1, a/b):boolean>
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble xpath_double SELECT xpath_double('12', 'sum(a/b)') struct<xpath_double(12, sum(a/b)):double>
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble xpath_number SELECT xpath_number('12', 'sum(a/b)') struct<xpath_number(12, sum(a/b)):double>
org.apache.spark.sql.catalyst.expressions.xml.XPathFloat xpath_float SELECT xpath_float('12', 'sum(a/b)') struct<xpath_float(12, sum(a/b)):float>
org.apache.spark.sql.catalyst.expressions.xml.XPathInt xpath_int SELECT xpath_int('12', 'sum(a/b)') struct<xpath_int(12, sum(a/b)):int>
org.apache.spark.sql.catalyst.expressions.xml.XPathList xpath SELECT xpath('b1b2b3c1c2','a/b/text()') struct<xpath(b1b2b3c1c2, a/b/text()):array>
org.apache.spark.sql.catalyst.expressions.xml.XPathLong xpath_long SELECT xpath_long('12', 'sum(a/b)') struct<xpath_long(12, sum(a/b)):bigint>
org.apache.spark.sql.catalyst.expressions.xml.XPathShort xpath_short SELECT xpath_short('12', 'sum(a/b)') struct<xpath_short(12, sum(a/b)):smallint>
org.apache.spark.sql.catalyst.expressions.xml.XPathString xpath_string SELECT xpath_string('bcc','a/c') struct<xpath_string(bcc, a/c):string>