In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

class IcebergUtility:
    MAX_ROW_DISPLAY = 100

    @staticmethod
    def list_snapshot_ids(table_name: str, spark: SparkSession) -> list[int]:
        catalog_name, database_name, tbl_name = table_name.split(".")
        jvm = spark._jvm
        conf = jvm.org.apache.hadoop.conf.Configuration()
        # catalog = spark._jsparkSession.sessionState().catalogManager().catalog(catalog_name)
        catalog = jvm.org.apache.iceberg.hadoop.HadoopCatalog(conf, "file:///home/jovyan/work/iceberg/warehouse")
        # iceberg_catalog = catalog.icebergCatalog()
        # table_identifier = iceberg_catalog.TableIdentifier.of(database_name, tbl_name)
        table_identifier = jvm.org.apache.iceberg.catalog.TableIdentifier.parse("db.fruits_price")
        table = catalog.loadTable(table_identifier)
        
        snapshots = list(table.snapshots())

        for snapshot in snapshots:
            print(f"Snapshot ID: {snapshot.snapshotId()}")
            print(f"Timestamp: {snapshot.timestampMillis()}")
            print(f"Operation: {snapshot.operation()}")
            print(f"Summary: {snapshot.summary()}")
            print("--------")

        print(f"Total snapshots = {len(snapshots)}")
        return [snapshot.snapshotId() for snapshot in snapshots]

    @staticmethod
    def show_table_contents(table_name: str, spark: SparkSession, snapshot_id: int = None):
        if snapshot_id:
            df = spark.read.format("iceberg") \
                .option("snapshot-id", snapshot_id) \
                .load(table_name)
        else:
            df = spark.read.format("iceberg").load(table_name)
        df.show(IcebergUtility.MAX_ROW_DISPLAY)
        
    @staticmethod
    def write_as_table(df, table_name: str, file_fmt:str = None):
        if file_fmt is not None and file_fmt.lower() =='orc':
            df.writeTo(table_name).using("iceberg").tableProperty("write.format.default", file_fmt).createOrReplace()
        else:
            df.writeTo(table_name).using("iceberg").createOrReplace()

    @staticmethod
    def write_as_partitioned_table(df, table_name: str, partition_col: str, file_fmt:str = None):
        if file_fmt is not None and file_fmt.lower() =='orc':
            df.writeTo(table_name).partitionedBy(col(partition_col)).using("iceberg").tableProperty("write.format.default", file_fmt).createOrReplace()
        else:
            df.writeTo(table_name).partitionedBy(col(partition_col)).using("iceberg").createOrReplace()

    @staticmethod
    def write_as_table_with_bloom(df, table_name: str, bloom_filter_column: str, bloom_filter_max_items: str):
        df.writeTo(table_name).using("iceberg").tableProperty("write.metadata.bloom-filter.columns", bloom_filter_column).tableProperty("write.metadata.bloom-filter.max-num-items", bloom_filter_max_items).createOrReplace()


    @staticmethod
    def append_to_table(df, table_name: str):
        df.writeTo(table_name).append()

    @staticmethod
    def count_rows(table_name: str, spark: SparkSession) -> int:
        df = spark.read.format("iceberg").load(table_name)
        return df.count()

    @staticmethod
    def describe_table(table_name: str, spark: SparkSession):
        spark.sql(f"DESCRIBE TABLE {table_name}").show(truncate=False)

    @staticmethod
    def list_tables(catalog: str, namespace: str, spark: SparkSession):
        tables = spark.catalog.listTables(f"{catalog}.{namespace}")
        print(f" Number of tables : {len(tables)}\n")
        for t in tables:
            print(f"[ {t.catalog} | {t.namespace} | {t.name} ]")

    @staticmethod
    def delete_table(table_name: str, spark: SparkSession):
        spark.sql(f"DROP TABLE IF EXISTS {table_name} PURGE")


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, round as pyspark_round

class Utility:

    @staticmethod
    def get_spark_session() -> SparkSession:
        spark = (
            SparkSession.builder
            .appName("Iceberg demo app")
            .master("local[*]")  # use local mode
            .config("spark.sql.catalog.my_catalog", "org.apache.iceberg.spark.SparkCatalog")
            .config("spark.sql.catalog.my_catalog.type", "hadoop")
            .config("spark.sql.catalog.my_catalog.warehouse", "warehouse")
            .getOrCreate()
        )
        return spark

    @staticmethod
    def read_file(file: str, spark: SparkSession):
        df = spark.read.option("multiline", "true").json(file)
        return df

    @staticmethod
    def drop_column(df, col_name: str):
        return df.drop(col_name)

    @staticmethod
    def add_column(df, col_name: str, value):
        return df.withColumn(col_name, lit(value))

    @staticmethod
    def apply_discount(df):
        return df.withColumn(
            "final_price",
            pyspark_round(col("price") * (lit(1) - col("discount") / 100.0), 3)
        )
