In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.functions import col, trim, nullif, to_timestamp, when, lit
from functools import reduce


class TransformationHelper:
    def cast_to_timestamp(self, df: DataFrame, columns: list) -> DataFrame:
        for column in columns:
            df = df.withColumn(column, to_timestamp(col(column)))
        return df

    def drop_duplicates(self, df: DataFrame, id_column: str) -> DataFrame:
        return df.drop_duplicates([id_column])

    def cast_to_numeric(self, df: DataFrame, column: str) -> DataFrame:
        return df.withColumn(column, col(column).cast('long'))

    def clean_string_types(self, df: DataFrame, columns: list) -> DataFrame:
        for column_name in columns:
            df = df.withColumn(
                column_name,
                when(col(column_name) == "", lit(None)).otherwise(col(column_name))
            )
        return df

    def clean_null_values(self, df: DataFrame, columns: list) -> DataFrame:
        return df.dropna(subset=columns)
    