diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 0db25ad8739f3..5106a83bd0ec4 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1105,6 +1105,20 @@ setMethod("monthname", column(jc) }) +#' @details +#' \code{dayname}: Extracts the three-letter abbreviated day name from a +#' given date/timestamp/string. +#' +#' @rdname column_datetime_functions +#' @aliases dayname dayname,Column-method +#' @note dayname since 4.0.0 +setMethod("dayname", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "dayname", x@jc) + column(jc) + }) + #' @details #' \code{decode}: Computes the first argument into a string from a binary using the provided #' character set. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 92febc02710da..26e81733055a6 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1024,6 +1024,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") }) #' @name NULL setGeneric("monthname", function(x) { standardGeneric("monthname") }) +#' @rdname column_datetime_functions +#' @name NULL +setGeneric("dayname", function(x) { standardGeneric("dayname") }) + #' @rdname column_string_functions #' @name NULL setGeneric("decode", function(x, charset) { standardGeneric("decode") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 118c853a00dfc..630781a57e444 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2063,6 +2063,7 @@ test_that("date functions on a DataFrame", { expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014)) expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12)) expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec", "Dec")) + expect_equal(collect(select(df, dayname(df$b)))[, 1], c("Thu", "Sat", "Mon")) expect_equal(collect(select(df, last_day(df$b)))[, 1], c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31"))) expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1], diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index 2a48958d42222..133b7e036cd7c 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -5960,6 +5960,15 @@ object functions { def monthname(timeExp: Column): Column = Column.fn("monthname", timeExp) + /** + * Extracts the three-letter abbreviated month name from a given date/timestamp/string. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def dayname(timeExp: Column): Column = + Column.fn("dayname", timeExp) + ////////////////////////////////////////////////////////////////////////////////////////////// // Collection functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index d5b384e9bc64b..b52f75a2914d0 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -2129,6 +2129,10 @@ class PlanGenerationTestSuite fn.monthname(fn.col("d")) } + temporalFunctionTest("dayname") { + fn.dayname(fn.col("d")) + } + temporalFunctionTest("next_day") { fn.next_day(fn.col("d"), "Mon") } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain new file mode 100644 index 0000000000000..49cbb48c88147 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain @@ -0,0 +1,2 @@ +Project [dayname(d#0) AS dayname(d)#0] ++- LocalRelation , [d#0, t#0, s#0, x#0L, wt#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json new file mode 100644 index 0000000000000..7898aa53deb89 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "dayname", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "d" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin new file mode 100644 index 0000000000000..2809f3b9b7a11 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin differ diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index cbdadd9cd2bda..ca20ccfb73c56 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -252,6 +252,7 @@ Date and Timestamp Functions datediff datepart day + dayname dayofmonth dayofweek dayofyear diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 1e22a42c6241e..6390e65eb1cf1 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -2985,6 +2985,13 @@ def monthname(col: "ColumnOrName") -> Column: monthname.__doc__ = pysparkfuncs.monthname.__doc__ +def dayname(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("dayname", col) + + +dayname.__doc__ = pysparkfuncs.dayname.__doc__ + + def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: return _invoke_function_over_columns("extract", field, source) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index f1422d17b071a..ea22b266db9ae 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -7303,6 +7303,39 @@ def monthname(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("monthname", col) +@_try_remote_functions +def dayname(col: "ColumnOrName") -> Column: + """ + Date and Timestamp Function: Returns the three-letter abbreviated day name from the given date. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + target date/timestamp column to work on. + + Returns + ------- + :class:`~pyspark.sql.Column` + the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...) + + Examples + -------- + Example 1: Basic usage of dayname function. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) + >>> df.select(sf.dayname('dt').alias('dayname')).show() + +-------+ + |dayname| + +-------+ + | Wed| + +-------+ + """ + return _invoke_function_over_columns("dayname", col) + + @_try_remote_functions def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: """ diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index c67ddcde44ea1..9ea70d25bd1f1 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -421,6 +421,12 @@ def test_monthname(self): row = df.select(F.monthname(df.date)).first() self.assertEqual(row[0], "Nov") + def test_dayname(self): + dt = datetime.datetime(2017, 11, 6) + df = self.spark.createDataFrame([Row(date=dt)]) + row = df.select(F.dayname(df.date)).first() + self.assertEqual(row[0], "Mon") + # Test added for SPARK-37738; change Python API to accept both col & int as input def test_date_add_function(self): dt = datetime.date(2021, 12, 27) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index a9b1178a8dbaf..e8c4122ac408f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -642,6 +642,7 @@ object FunctionRegistry { expression[UnixTimestamp]("unix_timestamp"), expression[DayOfWeek]("dayofweek"), expression[WeekDay]("weekday"), + expression[DayName]("dayname"), expression[WeekOfYear]("weekofyear"), expression[Year]("year"), expression[TimeWindow]("window"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 22d4e175b9a30..997d2d7420a39 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -909,6 +909,25 @@ case class MonthName(child: Expression) extends GetDateField { copy(child = newChild) } +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the three-letter abbreviated day name from the given date.", + examples = """ + Examples: + > SELECT _FUNC_(DATE('2008-02-20')); + Wed + """, + group = "datetime_funcs", + since = "4.0.0") +case class DayName(child: Expression) extends GetDateField { + override val func = DateTimeUtils.getDayName + override val funcName = "getDayName" + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + override def dataType: DataType = StringType + override protected def withNewChildInternal(newChild: Expression): DayName = + copy(child = newChild) +} + // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 0dd83c4b499ee..01503324048b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -208,6 +208,17 @@ object DateTimeUtils extends SparkDateTimeUtils { UTF8String.fromString(monthName) } + /** + * Returns the three-letter abbreviated day name for the given number of days since 1970-01-01. + */ + def getDayName(days: Int): UTF8String = { + val dayName = DayOfWeek + .of(getWeekDay(days) + 1) + .getDisplayName(TextStyle.SHORT, DateFormatter.defaultLocale) + + UTF8String.fromString(dayName) + } + /** * Adds months to a timestamp at the given time zone. It converts the input timestamp to a local * timestamp at the given time zone, adds months, and converts the resulted local timestamp diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 88bb05cbf9176..d159d911763b3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -269,6 +269,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(MonthName, DateType) } + test("DayName") { + checkEvaluation(DayName(Literal.create(null, DateType)), null) + checkEvaluation(DayName(Literal(d)), "Wed") + checkEvaluation(DayName(Cast(Literal(date), DateType, UTC_OPT)), "Wed") + checkEvaluation(DayName(Cast(Literal(ts), DateType, UTC_OPT)), "Fri") + checkEvaluation(DayName(Cast(Literal("2011-05-06"), DateType, UTC_OPT)), "Fri") + checkEvaluation(DayName(Cast(Literal(LocalDate.parse("2017-05-27")), DateType, UTC_OPT)), "Sat") + checkEvaluation(DayName(Cast(Literal(LocalDate.parse("1582-10-15")), DateType, UTC_OPT)), "Fri") + checkConsistencyBetweenInterpretedAndCodegen(DayName, DateType) + } + test("DateFormat") { Seq("legacy", "corrected").foreach { legacyParserPolicy => withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 97963c4ed924f..933d0b3f89a7e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -5747,6 +5747,15 @@ object functions { def monthname(timeExp: Column): Column = Column.fn("monthname", timeExp) + /** + * Extracts the three-letter abbreviated day name from a given date/timestamp/string. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def dayname(timeExp: Column): Column = + Column.fn("dayname", timeExp) + ////////////////////////////////////////////////////////////////////////////////////////////// // Collection functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index cff7921db0e53..f5bd0c8425d26 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -116,6 +116,7 @@ | org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | datepart | SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.DayName | dayname | SELECT dayname(DATE('2008-02-20')) | struct | | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 7d608f4a08c4b..dbd0bbec0396a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -279,6 +279,18 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Row("Apr", "Apr", "Apr")) } + test("dayname") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(dayname($"a"), dayname($"b"), dayname($"c")), + Row("Wed", "Wed", "Mon")) + + checkAnswer( + df.selectExpr("dayname(a)", "dayname(b)", "dayname(c)"), + Row("Wed", "Wed", "Mon")) + } + test("extract") { val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")