Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
from pyspark.sql.functions import current_date, current_timestamp

kolumny = ["timestamp", "unix", "Date"]
dane = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]

dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:27:23.096+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:27:23.096+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:27:23.096+0000


In [0]:

dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
from pyspark.sql.functions import unix_timestamp

zmianaFormatu = dataFrame.withColumn("timestamp", unix_timestamp(dataFrame.timestamp, "yyyy-MM-dd'T'HH:mm:ss"))

display(zmianaFormatu)

timestamp,unix,Date,current_date,current_timestamp
1427033614,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:27:24.373+0000
1427036598,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:27:24.373+0000
1427035119,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:27:24.373+0000


2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:

zmianaFormatu.printSchema()

root
 |-- timestamp: long (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
from pyspark.sql.functions import year, month, dayofyear, col

yearDate = dataFrame.withColumn('year', year('timestamp')) \
.withColumn('month', month('timestamp')) \
.withColumn('day_of_year', dayofyear('timestamp'))

display(yearDate)

timestamp,unix,Date,current_date,current_timestamp,year,month,day_of_year
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:27:25.684+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:27:25.684+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:27:25.684+0000,2015,3,81


In [0]:
#date_format
from pyspark.sql.functions import date_format

dateFormatted = dataFrame.select('timestamp', date_format('timestamp', 'MM/dd/yyyy HH:MM:SS'))

display(dateFormatted)

timestamp,"date_format(timestamp, MM/dd/yyyy HH:MM:SS)"
2015-03-22T14:13:34,03/22/2015 14:03:00
2015-03-22T15:03:18,03/22/2015 15:03:00
2015-03-22T14:38:39,03/22/2015 14:03:00


In [0]:
#to_date()
from pyspark.sql.functions import to_date

toDate = dataFrame.select(to_date('timestamp').alias('date'))
display(toDate)

date
2015-03-22
2015-03-22
2015-03-22


In [0]:
#from_unixtime()
from pyspark.sql.functions import from_unixtime

fromUnix = dataFrame.select('unix', from_unixtime(col('unix')/1000).alias('from_unix'))
display(fromUnix)

unix,from_unix
1646641525847,2022-03-07 08:25:25
1646641557555,2022-03-07 08:25:57
1646641578622,2022-03-07 08:26:18


In [0]:
#to_timestamp()
from pyspark.sql.functions import to_timestamp

toTimestamp = dataFrame.withColumn('timestamp', to_timestamp('timestamp'))
display(toTimestamp)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:27:28.982+0000
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:27:28.982+0000
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:27:28.982+0000


In [0]:
#to_utc_timestamp()
from pyspark.sql.functions import to_utc_timestamp

toUtcTimestamp = dataFrame.select('timestamp', to_utc_timestamp('timestamp', 'America/Caracas').alias('timestamp_Caracas'))
display(toUtcTimestamp)



timestamp,timestamp_Caracas
2015-03-22T14:13:34,2015-03-22T18:43:34.000+0000
2015-03-22T15:03:18,2015-03-22T19:33:18.000+0000
2015-03-22T14:38:39,2015-03-22T19:08:39.000+0000


In [0]:
#from_utc_timestamp()
from pyspark.sql.functions import from_utc_timestamp

fromUtcTimestamp = dataFrame.select('timestamp', from_utc_timestamp('timestamp', 'America/Caracas').alias('from_utc'))
display(fromUtcTimestamp)

timestamp,from_utc
2015-03-22T14:13:34,2015-03-22T09:43:34.000+0000
2015-03-22T15:03:18,2015-03-22T10:33:18.000+0000
2015-03-22T14:38:39,2015-03-22T10:08:39.000+0000
