In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, date_sub, year, date_format

# Create a Spark session
spark = SparkSession.builder.appName("DateConversion").getOrCreate()

# Sample data
data = [("2024-09-02",)]
df = spark.createDataFrame(data, ["input_date"])

# Define the date conversion function
def process_date(df):
    # Convert input_date to DateType
    df = df.withColumn("input_date", col("input_date").cast("date"))
    
    # Go one year back using date_sub with 365 days
    df = df.withColumn("date_one_year_back", date_sub(col("input_date"), 365))
    
    # Extract the year component
    df = df.withColumn("year_component", year(col("date_one_year_back")))
    
    # Extract the date component (in 'yyyy-MM-dd' format)
    df = df.withColumn("date_component", date_format(col("date_one_year_back"), "MM-dd"))
    
    return df

# Apply the function
result_df = process_date(df)
result_df.show()


+----------+------------------+--------------+--------------+
|input_date|date_one_year_back|year_component|date_component|
+----------+------------------+--------------+--------------+
|2024-09-02|        2023-09-03|          2023|         09-03|
+----------+------------------+--------------+--------------+

