In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import sys
import os
sys.path.append(os.path.abspath('./odibi_de_v2'))
os.environ["PYTHONDONTWRITEBYTECODE"] = "1"

In [0]:
from odibi_de_v2.transformer import TransformerProvider

In [0]:
from pyspark.sql import SparkSession
import pandas as pd
from odibi_de_v2.core import Framework

# ---------- Setup ----------
spark = SparkSession.builder.appName("TransformerProviderDemo").getOrCreate()

# Sample data
spark_df = spark.createDataFrame([(1, "A", 3.0), (2, "B", 6.0)], ["col1", "col2", "col3"])
pandas_df = pd.DataFrame({"col1": [1, 2], "col2": ["A", "B"], "col3": [3.0, 6.0]})

# ---------- Transformer Providers ----------
spark_provider = TransformerProvider(framework=Framework.SPARK)
pandas_provider = TransformerProvider(framework=Framework.PANDAS)

# ---------- Column Renamer ----------
renamed_spark = spark_provider.transform("SparkColumnRenamer", spark_df, column_map={"col1": "id", "col2": "name"})
renamed_pandas = pandas_provider.transform("PandasColumnRenamer", pandas_df, column_map={"col1": "id", "col2": "name"})

# ---------- Column Dropper ----------
dropped_spark = spark_provider.transform("SparkColumnDropper", spark_df, columns_to_drop=["col3"])
dropped_pandas = pandas_provider.transform("PandasColumnDropper", pandas_df, columns_to_drop=["col3"])

# ---------- Value Replacer ----------
value_map = {"col2": {"A": "X", "B": "Y"}}
replaced_spark = spark_provider.transform("SparkValueReplacer", spark_df, value_map=value_map)
replaced_pandas = pandas_provider.transform("PandasValueReplacer", pandas_df, value_map=value_map)

# ---------- Column Reorderer ----------
reordered_spark = spark_provider.transform("SparkColumnReorderer", spark_df, column_order=["col2", "col1"], retain_unspecified=True)
reordered_pandas = pandas_provider.transform("PandasColumnReorderer", pandas_df, column_order=["col2", "col1"], retain_unspecified=True)

# ---------- Column Adder ----------
added_spark = spark_provider.transform("SparkColumnAdder", spark_df, column_name="static_col", value="static")
added_pandas = pandas_provider.transform("PandasColumnAdder", pandas_df, column_name="static_col", value="static")

# ---------- Column Name Standardizer ----------
standardized_spark = spark_provider.transform("SparkColumnNameStandardizer", spark_df, case_style="snake_case", exclude_columns=["col2"])
standardized_pandas = pandas_provider.transform("PandasColumnNameStandardizer", pandas_df, case_style="snake_case", exclude_columns=["col2"])

# ---------- Show Results ----------
print("Pandas Renamed:")
print(renamed_pandas)
print("\nSpark Renamed:")
renamed_spark.show()
