doc: link to spark functions (PR #138)

* doc: Added some links to spark functions Some functions scaladoc issue --> #135 Co-authored-by: Eduardo Ruiz <eduardo.ruiz@hablapps.com>
hablapps · Jan 24, 2022 · b5efcd4 · b5efcd4
1 parent 75f91dd
commit b5efcd4
Show file tree

Hide file tree

Showing 7 changed files with 115 additions and 38 deletions.
diff --git a/core/src/main/scala/doric/syntax/BinaryColumns.scala b/core/src/main/scala/doric/syntax/BinaryColumns.scala
@@ -35,6 +35,7 @@ private[syntax] trait BinaryColumns {
       * as a 32 character hex string.
       *
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.md5]]
       */
     def md5: StringColumn = column.elem.map(f.md5).toDC
 
@@ -43,6 +44,7 @@ private[syntax] trait BinaryColumns {
       * as a 40 character hex string.
       *
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.sha1]]
       */
     def sha1: StringColumn = column.elem.map(f.sha1).toDC
 
@@ -52,6 +54,7 @@ private[syntax] trait BinaryColumns {
       *
       * @throws java.lang.IllegalArgumentException if numBits is not in the permitted values
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.sha2]]
       */
     def sha2(numBits: Int): StringColumn =
       column.elem.map(x => f.sha2(x, numBits)).toDC
@@ -61,6 +64,7 @@ private[syntax] trait BinaryColumns {
       * returns the value as a long column.
       *
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.crc32]]
       */
     def crc32: LongColumn = column.elem.map(f.crc32).toDC
 
@@ -69,6 +73,7 @@ private[syntax] trait BinaryColumns {
       * This is the reverse of unbase64.
       *
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.base64]]
       */
     def base64: StringColumn = column.elem.map(f.base64).toDC
 
@@ -78,6 +83,7 @@ private[syntax] trait BinaryColumns {
       * If either argument is null, the result will also be null.
       *
       * @group Binary Type
+      * @see [[org.apache.spark.sql.functions.decode]]
       */
     def decode(charset: StringColumn): StringColumn =
       (column.elem, charset.elem)

diff --git a/core/src/main/scala/doric/syntax/BooleanColumns.scala b/core/src/main/scala/doric/syntax/BooleanColumns.scala
@@ -67,6 +67,7 @@ private[syntax] trait BooleanColumns {
       *
       * @throws java.lang.RuntimeException if the condition is false
       * @group Boolean Type
+      * @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.assert_true]]
       */
     def assertTrue: NullColumn = column.elem.map(f.assert_true).toDC
 
@@ -75,6 +76,7 @@ private[syntax] trait BooleanColumns {
       *
       * @throws java.lang.RuntimeException if the condition is false
       * @group Boolean Type
+      * @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column,e:* org.apache.spark.sql.functions.assert_true]]
       */
     def assertTrue(msg: StringColumn): NullColumn =
       (column.elem, msg.elem).mapN(f.assert_true).toDC

diff --git a/core/src/main/scala/doric/syntax/CommonColumns.scala b/core/src/main/scala/doric/syntax/CommonColumns.scala
@@ -21,6 +21,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
     *   the DoricColumns to coalesce
     * @return
     *   the first column that is not null, or null if all inputs are null.
+    * @see [[org.apache.spark.sql.functions.coalesce]]
     */
   def coalesce[T](cols: DoricColumn[T]*): DoricColumn[T] =
     cols.map(_.elem).toList.sequence.map(f.coalesce(_: _*)).toDC
@@ -29,6 +30,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
     * Calculates the hash code of given columns, and returns the result as an integer column.
     *
     * @group All Types
+    * @see [[org.apache.spark.sql.functions.hash]]
     */
   def hash(cols: DoricColumn[_]*): IntegerColumn =
     cols.map(_.elem).toList.sequence.map(f.hash(_: _*)).toDC
@@ -38,6 +40,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
     * variant of the xxHash algorithm, and returns the result as a long column.
     *
     * @group All Types
+    * @see [[org.apache.spark.sql.functions.xxhash64]]
     */
   def xxhash64(cols: DoricColumn[_]*): LongColumn =
     cols.map(_.elem).toList.sequence.map(f.xxhash64(_: _*)).toDC
@@ -185,6 +188,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
       *   literals to compare to
       * @return
       *   Boolean DoricColumn with the comparation logic.
+      * @see [[org.apache.spark.sql.Column.isin]]
       */
     def isIn(elems: T*): BooleanColumn = column.elem.map(_.isin(elems: _*)).toDC
 
@@ -193,6 +197,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
       * @group All Types
       * @return
       *   Boolean DoricColumn
+      * @see [[org.apache.spark.sql.Column.isNull]]
       */
     def isNull: BooleanColumn = column.elem.map(_.isNull).toDC
 
@@ -201,6 +206,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
       * @group All Types
       * @return
       *   Boolean DoricColumn
+      * @see [[org.apache.spark.sql.Column.isNotNull]]
       */
     def isNotNull: BooleanColumn = column.elem.map(_.isNotNull).toDC
 

diff --git a/core/src/main/scala/doric/syntax/DateColumns.scala b/core/src/main/scala/doric/syntax/DateColumns.scala
@@ -15,6 +15,7 @@ private[syntax] trait DateColumns {
     * All calls of current_date within the same query return the same value.
     *
     * @group Date Type
+    * @see [[org.apache.spark.sql.functions.current_date]]
     */
   def currentDate(): DateColumn = f.current_date().asDoric[Date]
 
@@ -32,6 +33,7 @@ private[syntax] trait DateColumns {
       *   Date column after adding months
       * @note
       *   Timestamp columns will be truncated to Date column
+      * @see [[org.apache.spark.sql.functions.add_months(startDate:org\.apache\.spark\.sql\.Column,numMonths:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.add_months]]
       */
     def addMonths(nMonths: IntegerColumn): DateColumn =
       (column.elem, nMonths.elem).mapN(f.add_months).toDC
@@ -44,6 +46,7 @@ private[syntax] trait DateColumns {
       * @note
       *   Timestamp columns will be truncated to Date column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.date_add(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_add]]
       */
     def addDays(days: IntegerColumn): DateColumn =
       (column.elem, days.elem).mapN(f.date_add).toDC
@@ -59,6 +62,7 @@ private[syntax] trait DateColumns {
       *   Use specialized functions like 'year' whenever possible as they benefit from a
       *   specialized implementation.
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.date_format]]
       */
     def format(format: StringColumn): StringColumn =
       (column.elem, format.elem)
@@ -75,6 +79,7 @@ private[syntax] trait DateColumns {
       * @note
       *   Timestamp columns will be truncated to Date column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.date_sub(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_sub]]
       */
     def subDays(days: IntegerColumn): DateColumn =
       (column.elem, days.elem).mapN(f.date_sub).toDC
@@ -85,6 +90,7 @@ private[syntax] trait DateColumns {
       * @param dateCol
       *   A Date or Timestamp column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.datediff]]
       */
     def diff(dateCol: DoricColumn[T]): IntegerColumn =
       (column.elem, dateCol.elem)
@@ -95,6 +101,7 @@ private[syntax] trait DateColumns {
       * Extracts the day of the month as an integer from a given date.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.dayofmonth]]
       */
     def dayOfMonth: IntegerColumn = column.elem.map(f.dayofmonth).toDC
 
@@ -103,20 +110,23 @@ private[syntax] trait DateColumns {
       * Ranges from 1 for a Sunday through to 7 for a Saturday
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.dayofweek]]
       */
     def dayOfWeek: IntegerColumn = column.elem.map(f.dayofweek).toDC
 
     /**
       * Extracts the day of the year as an integer from a given date.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.dayofyear]]
       */
     def dayOfYear: IntegerColumn = column.elem.map(f.dayofyear).toDC
 
     /**
       * Sets the moment to the last day of the same month.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.last_day]]
       */
     def endOfMonth: DateColumn = lastDayOfMonth
 
@@ -126,13 +136,15 @@ private[syntax] trait DateColumns {
       * month in July 2015.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.last_day]]
       */
     def lastDayOfMonth: DateColumn = column.elem.map(f.last_day).toDC
 
     /**
       * Extracts the month as an integer from a given date.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.month]]
       */
     def month: IntegerColumn = column.elem.map(f.month).toDC
 
@@ -143,7 +155,7 @@ private[syntax] trait DateColumns {
       * of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
       *
       * For example:
-      * {{{
+      * @example {{{
       * Date("2017-11-14").monthsBetween(Date("2017-07-14"))                              // returns 4.0
       * Date("2017-01-01").monthsBetween(Date("2017-01-10"))                              // returns 0.29032258
       * Timestamp("2017-06-01 00:00:00").monthsBetween(Timestamp("2017-06-16 12:00:00"))  // returns -0.5
@@ -152,6 +164,7 @@ private[syntax] trait DateColumns {
       * @param dateCol
       *   Date or Timestamp column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.months_between]]
       */
     def monthsBetween(dateCol: DoricColumn[T]): DoubleColumn =
       (column.elem, dateCol.elem).mapN(f.months_between).toDC
@@ -165,6 +178,7 @@ private[syntax] trait DateColumns {
       *   If `roundOff` is set to true, the result is rounded off to 8 digits;
       *   it is not rounded otherwise.
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column,roundOff:* org.apache.spark.sql.functions.months_between]]
       */
     def monthsBetween(
         dateCol: DoricColumn[T],
@@ -180,14 +194,15 @@ private[syntax] trait DateColumns {
       * Returns the first date which is later than the value of the `date` column that is on the
       * specified day of the week.
       *
-      * For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") because
-      * that is the first Sunday after 2015-07-27.
+      * @example For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02")
+      * because that is the first Sunday after 2015-07-27.
       *
       * @param dayOfWeek
       *   Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
       * @note
       *   Timestamp columns will be truncated to Date column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.next_day]]
       */
     def nextDay(dayOfWeek: StringColumn): DateColumn =
       (column.elem, dayOfWeek.elem)
@@ -200,28 +215,30 @@ private[syntax] trait DateColumns {
       * Extracts the quarter as an integer from a given date.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.quarter]]
       */
     def quarter: IntegerColumn = column.elem.map(f.quarter).toDC
 
     /**
       * Returns date truncated to the unit specified by the format.
       *
-      * For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
+      * @example For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
       *
       * @param format
-      *   if date:
-      *     * 'year', 'yyyy', 'yy' to truncate by year,
-      *     * 'month', 'mon', 'mm' to truncate by month
-      *     Other options are: 'week', 'quarter'
-      *   if timestamp:
-      *     * 'year', 'yyyy', 'yy' to truncate by year,
-      *     * 'month', 'mon', 'mm' to truncate by month,
-      *     * 'day', 'dd' to truncate by day,
-      *     Other options are:
-      *     * 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
+      *   - if <b>date</b>:
+      *     - 'year', 'yyyy', 'yy' to truncate by year,
+      *     - 'month', 'mon', 'mm' to truncate by month
+      *     - __Other options are__: 'week', 'quarter'
+      *   - if <b>timestamp</b>:
+      *     - 'year', 'yyyy', 'yy' to truncate by year,
+      *     - 'month', 'mon', 'mm' to truncate by month,
+      *     - 'day', 'dd' to truncate by day,
+      *     - __Other options are__:  'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
       * @note
       *   Timestamp columns will be truncated to Date column
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.trunc]]
+      * @see [[org.apache.spark.sql.functions.date_trunc]]
       */
     def truncate(format: StringColumn): DoricColumn[T] =
       (column.elem, format.elem)
@@ -243,6 +260,7 @@ private[syntax] trait DateColumns {
       *   A long
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.unix_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.unix_timestamp]]
       */
     def unixTimestamp: LongColumn = column.elem.map(f.unix_timestamp).toDC
 
@@ -253,27 +271,31 @@ private[syntax] trait DateColumns {
       * as defined by ISO 8601
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.weekofyear]]
       */
     def weekOfYear: IntegerColumn = column.elem.map(f.weekofyear).toDC
 
     /**
       * Extracts the year as an integer from a given date.
       *
       * @group Date & Timestamp Type
+      * @see [[org.apache.spark.sql.functions.year]]
       */
     def year: IntegerColumn = column.elem.map(f.year).toDC
 
     /**
       * Transform date to timestamp
       *
       * @group Date Type
+      * @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
       */
     def toTimestamp: TimestampColumn = column.elem.map(f.to_timestamp).toDC
 
     /**
       * Transform date to Instant
       *
       * @group Date Type
+      * @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
       */
     def toInstant: InstantColumn = column.elem.map(f.to_timestamp).toDC
   }