In [0]:
from pyspark.sql import SparkSession

# Create SparkSession
spark = SparkSession.builder \
          .appName('SparkByExamples.com') \
          .getOrCreate()

In [0]:
data = [("1","2019-07-01"),("2","2019-06-24"),("3","2019-08-24")]

In [0]:
df=spark.createDataFrame(data=data,schema=["id","date"])

In [0]:
from pyspark.sql.functions import *


In [0]:
df.select(
      col("date"),
      current_date().alias("current_date"),
      datediff(current_date(),col("date")).alias("datediff")
    ).show()

+----------+------------+--------+
|      date|current_date|datediff|
+----------+------------+--------+
|2019-07-01|  2023-06-14|    1444|
|2019-06-24|  2023-06-14|    1451|
|2019-08-24|  2023-06-14|    1390|
+----------+------------+--------+



In [0]:
df.withColumn("datesDiff", datediff(current_date(),col("date"))) \
  .withColumn("montsDiff", months_between(current_date(),col("date"))) \
  .withColumn("montsDiff_round",round(months_between(current_date(),col("date")),2)) \
  .withColumn("yearsDiff",months_between(current_date(),col("date"))/lit(12)) \
  .withColumn("yearsDiff_round",round(months_between(current_date(),col("date"))/lit(12),2)) \
  .show()


+---+----------+---------+-----------+---------------+------------------+---------------+
| id|      date|datesDiff|  montsDiff|montsDiff_round|         yearsDiff|yearsDiff_round|
+---+----------+---------+-----------+---------------+------------------+---------------+
|  1|2019-07-01|     1444|47.41935484|          47.42| 3.951612903333333|           3.95|
|  2|2019-06-24|     1451|47.67741935|          47.68|3.9731182791666666|           3.97|
|  3|2019-08-24|     1390|45.67741935|          45.68|      3.8064516125|           3.81|
+---+----------+---------+-----------+---------------+------------------+---------------+



In [0]:
data2 = [("1","07-01-2019"),("2","06-24-2019"),("3","08-24-2019")]  
df2=spark.createDataFrame(data=data2,schema=["id","date"])
df2.select(
    to_date(col("date"),"MM-dd-yyyy").alias("date"),
    current_date().alias("endDate")
    )

Out[7]: DataFrame[date: date, endDate: date]

In [0]:
#SQL

spark.sql("select round(months_between('2019-07-01',current_date())/12,2) as years_diff").show()

+----------+
|years_diff|
+----------+
|     -3.95|
+----------+

