**DATE_FORMAT()**

- This function allows you to **convert date and timestamp** columns into a specified **string** format.

- In order to use DATE_FORMAT() function, your column should be either in **'date' format or 'timestamp' format**.

-  Format a date column into a specific pattern you need, like changing **2024-09-05** into **05-Sep-2024** or any format you prefer. 

In [0]:
%fs ls dbfs:/FileStore/tables/

path,name,size,modificationTime
dbfs:/FileStore/tables/Flatten Nested Array.json,Flatten Nested Array.json,3756,1718618620000
dbfs:/FileStore/tables/MarketPrice-1.csv,MarketPrice-1.csv,19528,1719656512000
dbfs:/FileStore/tables/MarketPrice.csv,MarketPrice.csv,19528,1719656208000
dbfs:/FileStore/tables/Question7.csv,Question7.csv,154,1725816645000
dbfs:/FileStore/tables/RunningData_Rev02.csv,RunningData_Rev02.csv,1222,1719810609000
dbfs:/FileStore/tables/RunningData_Rev03.csv,RunningData_Rev03.csv,1216,1719810946000
dbfs:/FileStore/tables/SalesData_Rev02.csv,SalesData_Rev02.csv,472,1719810784000
dbfs:/FileStore/tables/SalesData_Rev03.csv,SalesData_Rev03.csv,460,1719810973000
dbfs:/FileStore/tables/Sales_Collect.csv,Sales_Collect.csv,182972,1720293547000
dbfs:/FileStore/tables/Sales_Collect_Rev02.csv,Sales_Collect_Rev02.csv,166107,1719810826000


In [0]:
df = spark.read.csv("dbfs:/FileStore/tables/date_format-2.csv", header=True, inferSchema=True)
display(df.limit(10))

input_timestamp,Sensex_Category,Label_Type,Last_transaction_date,Effective_Date,last_timestamp,pymt_timestamp
25/04/2023 2:00,Top,average,2024-02-26,6-Feb-23,25/04/2023 24:56:18,25/04/2023 2
26/04/2023 6:01,Top,average,2023-12-21,6-Feb-23,25/04/2002 21:12:00,26/04/2023 6
20/01/2020 4:01,Top,average,2025-03-27,8-Jan-24,25/04/2021 12:34:01,20/01/2020 4
26/04/2023 2:02,Top,average,2023-12-27,8-Jan-24,25/04/1957 20:12:01,26/04/2023 2
25/04/2023 5:02,Top,average,2024-04-29,6-Mar-23,25/04/2023 23:45:22,25/04/2023 5
25/04/2023 9:03,Forward,medium,2024-12-27,6-Mar-23,25/04/2024 14:12:02,25/04/2023 9
25/04/2023 7:03,Forward,medium,2024-03-26,6-Jan-25,25/04/2023 20:00:03,25/04/2023 7
26/03/2023 8:04,Forward,medium,2024-11-28,6-Jan-25,25/04/2024 14:12:03,26/03/2023 8
25/01/2022 4:04,Forward,medium,2023-12-27,6-Apr-23,25/05/2021 23:45:04,25/01/2022 4
26/03/2023 8:05,Forward,medium,2023-05-15,6-Apr-23,25/04/2024 14:12:04,26/03/2023 8


In [0]:
import pyspark.sql.functions as f
from pyspark.sql.functions import col, to_timestamp, date_format, current_date, current_timestamp, to_date

#### **1) Convert date to string**
**Last_transaction_date**

In [0]:
df_ltd = df.withColumn("Last_transaction_date",date_format("Last_transaction_date","yyyy-MM-dd"))
display(df_ltd.limit(10))

input_timestamp,Sensex_Category,Label_Type,Last_transaction_date,Effective_Date,last_timestamp,pymt_timestamp
25/04/2023 2:00,Top,average,2024-02-26,6-Feb-23,25/04/2023 24:56:18,25/04/2023 2
26/04/2023 6:01,Top,average,2023-12-21,6-Feb-23,25/04/2002 21:12:00,26/04/2023 6
20/01/2020 4:01,Top,average,2025-03-27,8-Jan-24,25/04/2021 12:34:01,20/01/2020 4
26/04/2023 2:02,Top,average,2023-12-27,8-Jan-24,25/04/1957 20:12:01,26/04/2023 2
25/04/2023 5:02,Top,average,2024-04-29,6-Mar-23,25/04/2023 23:45:22,25/04/2023 5
25/04/2023 9:03,Forward,medium,2024-12-27,6-Mar-23,25/04/2024 14:12:02,25/04/2023 9
25/04/2023 7:03,Forward,medium,2024-03-26,6-Jan-25,25/04/2023 20:00:03,25/04/2023 7
26/03/2023 8:04,Forward,medium,2024-11-28,6-Jan-25,25/04/2024 14:12:03,26/03/2023 8
25/01/2022 4:04,Forward,medium,2023-12-27,6-Apr-23,25/05/2021 23:45:04,25/01/2022 4
26/03/2023 8:05,Forward,medium,2023-05-15,6-Apr-23,25/04/2024 14:12:04,26/03/2023 8


#### **2) Convert string to timestamp**

**input_timestamp, Last_transaction_date, Effective_Date, last_timestamp, pymt_timestamp**

In [0]:
# convert input_timestamp, Last_transaction_date, Effective_Date, last_timestamp, pymt_timestamp into timestamp
df_all = df.withColumn("input_timestamp",to_timestamp("input_timestamp","dd/MM/yyyy H:mm"))\
           .withColumn("Last_transaction_date",to_timestamp("Last_transaction_date","yyyy-MM-dd"))\
           .withColumn("Effective_Date",to_timestamp("Effective_Date","d-MMM-yy"))\
           .withColumn("last_timestamp",to_timestamp("last_timestamp","dd/MM/yyyy HH:mm:ss"))\
           .withColumn("pymt_timestamp",to_timestamp("pymt_timestamp","dd/MM/yyyy H"))    
display(df_all.limit(10))

input_timestamp,Sensex_Category,Label_Type,Last_transaction_date,Effective_Date,last_timestamp,pymt_timestamp
2023-04-25T02:00:00.000+0000,Top,average,2024-02-26T00:00:00.000+0000,2023-02-06T00:00:00.000+0000,,2023-04-25T02:00:00.000+0000
2023-04-26T06:01:00.000+0000,Top,average,2023-12-21T00:00:00.000+0000,2023-02-06T00:00:00.000+0000,2002-04-25T21:12:00.000+0000,2023-04-26T06:00:00.000+0000
2020-01-20T04:01:00.000+0000,Top,average,2025-03-27T00:00:00.000+0000,2024-01-08T00:00:00.000+0000,2021-04-25T12:34:01.000+0000,2020-01-20T04:00:00.000+0000
2023-04-26T02:02:00.000+0000,Top,average,2023-12-27T00:00:00.000+0000,2024-01-08T00:00:00.000+0000,1957-04-25T20:12:01.000+0000,2023-04-26T02:00:00.000+0000
2023-04-25T05:02:00.000+0000,Top,average,2024-04-29T00:00:00.000+0000,2023-03-06T00:00:00.000+0000,2023-04-25T23:45:22.000+0000,2023-04-25T05:00:00.000+0000
2023-04-25T09:03:00.000+0000,Forward,medium,2024-12-27T00:00:00.000+0000,2023-03-06T00:00:00.000+0000,2024-04-25T14:12:02.000+0000,2023-04-25T09:00:00.000+0000
2023-04-25T07:03:00.000+0000,Forward,medium,2024-03-26T00:00:00.000+0000,2025-01-06T00:00:00.000+0000,2023-04-25T20:00:03.000+0000,2023-04-25T07:00:00.000+0000
2023-03-26T08:04:00.000+0000,Forward,medium,2024-11-28T00:00:00.000+0000,2025-01-06T00:00:00.000+0000,2024-04-25T14:12:03.000+0000,2023-03-26T08:00:00.000+0000
2022-01-25T04:04:00.000+0000,Forward,medium,2023-12-27T00:00:00.000+0000,2023-04-06T00:00:00.000+0000,2021-05-25T23:45:04.000+0000,2022-01-25T04:00:00.000+0000
2023-03-26T08:05:00.000+0000,Forward,medium,2023-05-15T00:00:00.000+0000,2023-04-06T00:00:00.000+0000,2024-04-25T14:12:04.000+0000,2023-03-26T08:00:00.000+0000


In [0]:
%sql
select to_timestamp("25/04/2023 24:56:18", 'dd/MM/yyyy HH:mm:ss')

"to_timestamp(25/04/2023 24:56:18, dd/MM/yyyy HH:mm:ss)"
""


In [0]:
%sql
select to_timestamp("25/04/2023 23:56:18", 'dd/MM/yyyy HH:mm:ss')

"to_timestamp(25/04/2023 23:56:18, dd/MM/yyyy HH:mm:ss)"
2023-04-25T23:56:18.000+0000


#### **3) Convert timestamp to string**

In [0]:
# convert input_timestamp, Last_transaction_date, Effective_Date, last_timestamp, pymt_timestamp into timestamp
df_tmstpm_stg = df_all.withColumn("input_timestamp",date_format("input_timestamp","yyyy/MM/dd HH:mm:ss"))\
                      .withColumn("Last_transaction_date",date_format("Last_transaction_date","yyyy/MM/dd HH:mm:ss"))\
                      .withColumn("Effective_Date",date_format("Effective_Date","yyyy/MM/dd HH:mm:ss"))\
                      .withColumn("last_timestamp",date_format("last_timestamp","yyyy/MM/dd HH:mm:ss"))\
                      .withColumn("pymt_timestamp",date_format("pymt_timestamp","yyyy/MM/dd HH:mm:ss"))    
display(df_tmstpm_stg.limit(10))

input_timestamp,Sensex_Category,Label_Type,Last_transaction_date,Effective_Date,last_timestamp,pymt_timestamp
2023/04/25 02:00:00,Top,average,2024/02/26 00:00:00,2023/02/06 00:00:00,,2023/04/25 02:00:00
2023/04/26 06:01:00,Top,average,2023/12/21 00:00:00,2023/02/06 00:00:00,2002/04/25 21:12:00,2023/04/26 06:00:00
2020/01/20 04:01:00,Top,average,2025/03/27 00:00:00,2024/01/08 00:00:00,2021/04/25 12:34:01,2020/01/20 04:00:00
2023/04/26 02:02:00,Top,average,2023/12/27 00:00:00,2024/01/08 00:00:00,1957/04/25 20:12:01,2023/04/26 02:00:00
2023/04/25 05:02:00,Top,average,2024/04/29 00:00:00,2023/03/06 00:00:00,2023/04/25 23:45:22,2023/04/25 05:00:00
2023/04/25 09:03:00,Forward,medium,2024/12/27 00:00:00,2023/03/06 00:00:00,2024/04/25 14:12:02,2023/04/25 09:00:00
2023/04/25 07:03:00,Forward,medium,2024/03/26 00:00:00,2025/01/06 00:00:00,2023/04/25 20:00:03,2023/04/25 07:00:00
2023/03/26 08:04:00,Forward,medium,2024/11/28 00:00:00,2025/01/06 00:00:00,2024/04/25 14:12:03,2023/03/26 08:00:00
2022/01/25 04:04:00,Forward,medium,2023/12/27 00:00:00,2023/04/06 00:00:00,2021/05/25 23:45:04,2022/01/25 04:00:00
2023/03/26 08:05:00,Forward,medium,2023/05/15 00:00:00,2023/04/06 00:00:00,2024/04/25 14:12:04,2023/03/26 08:00:00


#### **4) Format date columns**

In [0]:
# Using date_format()
df_format = df_tmstpm_stg.select(current_date().alias("current_date"), \
                           date_format(current_timestamp(),"yyyy MM dd").alias("yyyy MM dd"), \
                           date_format(current_timestamp(),"MM/dd/yyyy hh:mm").alias("MM/dd/yyyy"), \
                           date_format(current_timestamp(),"yyyy MMM dd").alias("yyyy MMMM dd"), \
                           date_format(current_timestamp(),"yyyy MMMM dd E").alias("yyyy MMMM dd E")
                           )
display(df_format.limit(10))

current_date,yyyy MM dd,MM/dd/yyyy,yyyy MMMM dd,yyyy MMMM dd E
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun
2024-09-08,2024 09 08,09/08/2024 06:07,2024 Sep 08,2024 September 08 Sun


In [0]:
# SQL
spark.sql("select current_date() as current_date, "+
      "date_format(current_timestamp(),'yyyy MM dd') as yyyy_MM_dd, "+
      "date_format(current_timestamp(),'MM/dd/yyyy hh:mm') as MM_dd_yyyy, "+
      "date_format(current_timestamp(),'yyyy MMM dd') as yyyy_MMMM_dd, "+
      "date_format(current_timestamp(),'yyyy MMMM dd E') as yyyy_MMMM_dd_E").display()

current_date,yyyy_MM_dd,MM_dd_yyyy,yyyy_MMMM_dd,yyyy_MMMM_dd_E
2024-09-08,2024 09 08,09/08/2024 06:09,2024 Sep 08,2024 September 08 Sun


#### **5) convert date to string data type**

In [0]:
data = spark.createDataFrame([('05/22/2022', '10/21/2022')], schema=['Input_Timestamp', 'Last_Timestamp'])
display(data)

Input_Timestamp,Last_Timestamp
05/22/2022,10/21/2022


In [0]:
# to_date() converts "date string" into "date". We need to specify format of date in the string in the function
# convert string to date
string_date = data \
    .withColumn('Input_Timestamp', to_date(col("Input_Timestamp"), "MM/dd/yyyy")) \
    .withColumn('Last_Timestamp', to_date(col("Last_Timestamp"), "MM/dd/yyyy"))

display(string_date)

Input_Timestamp,Last_Timestamp
2022-05-22,2022-10-21


In [0]:
# convert date to string
date_string = string_date \
    .withColumn('Input_Timestamp', date_format(col("Input_Timestamp"), "dd-MM-yyyy")) \
    .withColumn('Last_Timestamp', date_format(col("Last_Timestamp"), "dd-MM-yyyy"))

display(date_string)

Input_Timestamp,Last_Timestamp
22-05-2022,21-10-2022


In [0]:
# Assuming 'data' is your DataFrame
date_format_corrected = data \
    .withColumn('Input_Timestamp', date_format(to_date(col("Input_Timestamp"), "MM/dd/yyyy"), "dd-MM-yyyy")) \
    .withColumn('Last_Timestamp', date_format(to_date(col("Last_Timestamp"), "MM/dd/yyyy"), "dd-MM-yyyy"))

display(date_format_corrected)

Input_Timestamp,Last_Timestamp
22-05-2022,21-10-2022
