In [7]:
from pyspark.sql import SparkSession
from pyspark.sql.dataframe import DataFrame
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, BooleanType, TimestampType

In [8]:
spark = SparkSession.builder.getOrCreate()

In [24]:
df = spark.createDataFrame(
    [
        (348272371, "2023-01-01", 5.50, "shopping", True, ("2023-01-01", "2023-01-02")),
        (348272371, "2023-01-01", 6.10, "salute", False, ("2023-01-01", "2023-01-02")),
        (348272371, "2023-01-01", 8.20, "trasporti", False, ("2023-01-01", "2023-01-02")),
        (348272371, "2023-01-01", 1.50, "trasporti", True, ("2023-01-01", "2023-01-02")),
        (348272371, "2023-01-06", 20.20, "shopping", False, ("2023-01-06", "2023-01-07")),
        (348272371, "2023-01-06", 43.00, "shopping", True, ("2023-01-06", "2023-01-07")),
        (348272371, "2023-01-06", 72.20, "shopping", False, ("2023-01-06", "2023-01-07")),
        (234984832, "2023-01-01", 15.34, "salute", True, ("2023-01-01", "2023-01-02")),
        (234984832, "2023-01-01", 36.22, "salute", True, ("2023-01-01", "2023-01-02")),
        (234984832, "2023-01-01", 78.35, "salute", False, ("2023-01-01", "2023-01-02")),
        (234984832, "2023-01-02", 2.20, "trasporti", True, ("2023-01-02", "2023-01-03")),
    ],
    schema=StructType([
        StructField("ID_BIC_CLIENTE", IntegerType(), False),
        StructField("DATA_TRANSAZIONE", StringType(), False),
        StructField("IMPORTO", FloatType(), False),
        StructField("CA_CATEGORY_LIV0", StringType(), False),
        StructField("IS_CARTA", BooleanType(), False),
        StructField("bucket", StructType([
            StructField("start", StringType(), False),
            StructField("end", StringType(), False),
        ]), False)
    ])
)

df = df.withColumn("DATA_TRANSAZIONE", F.to_timestamp(F.col("DATA_TRANSAZIONE"), "yyyy-MM-dd"))
df = df.withColumn("bucket", F.struct(
        F.to_timestamp(F.col("bucket.start"), "yyyy-MM-dd").alias("start"),
        F.to_timestamp(F.col("bucket.end"), "yyyy-MM-dd").alias("end")
    )
)

df.printSchema()
df.show(truncate=False)

root
 |-- ID_BIC_CLIENTE: integer (nullable = false)
 |-- DATA_TRANSAZIONE: timestamp (nullable = true)
 |-- IMPORTO: float (nullable = false)
 |-- CA_CATEGORY_LIV0: string (nullable = false)
 |-- IS_CARTA: boolean (nullable = false)
 |-- bucket: struct (nullable = false)
 |    |-- start: timestamp (nullable = true)
 |    |-- end: timestamp (nullable = true)

+--------------+-------------------+-------+----------------+--------+------------------------------------------+
|ID_BIC_CLIENTE|DATA_TRANSAZIONE   |IMPORTO|CA_CATEGORY_LIV0|IS_CARTA|bucket                                    |
+--------------+-------------------+-------+----------------+--------+------------------------------------------+
|348272371     |2023-01-01 00:00:00|5.5    |shopping        |true    |{2023-01-01 00:00:00, 2023-01-02 00:00:00}|
|348272371     |2023-01-01 00:00:00|6.1    |salute          |false   |{2023-01-01 00:00:00, 2023-01-02 00:00:00}|
|348272371     |2023-01-01 00:00:00|8.2    |trasporti       |false  