# Apache Iceberg Demo

In [None]:
spark.sql("use tpch_iceberg")

## Adding a column

In [None]:
%%sql
alter table spark_catalog.tpch_iceberg.customer 
add column c_email string after c_name;


In [None]:
%%sql
describe table spark_catalog.tpch_iceberg.customer

## Rollback example

In [None]:
%%sql
select committed_at,snapshot_id from spark_catalog.tpch_iceberg.customer.snapshots

In [None]:
%%sql
CALL spark_catalog.system.rollback_to_snapshot('tpch_iceberg.customer', 5359419852330289230)

## Create table with partiton by day

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS lineitem_day (
    l_orderkey BIGINT,
    l_partkey BIGINT,
    l_suppkey BIGINT,
    l_linenumber INT,
    l_quantity DECIMAL(15,2),
    l_extendedprice DECIMAL(15,2),
    l_discount DECIMAL(15,2),
    l_tax DECIMAL(15,2),
    l_returnflag STRING,
    l_linestatus STRING,
    l_shipdate DATE,
    l_commitdate DATE,
    l_receiptdate DATE,
    l_shipinstruct STRING,
    l_shipmode STRING,
    l_comment STRING
) USING iceberg
PARTITIONED BY (days(l_shipdate))
TBLPROPERTIES (
    'write.format.default' = 'parquet',
    'write.parquet.compression-codec' = 'snappy',
    'format-version' = '2'
)

## Create table with partition by bucket

In [None]:
%%sql 
CREATE TABLE IF NOT EXISTS customer_bucket (
    c_custkey BIGINT,
    c_name STRING,
    c_address STRING,
    c_nationkey BIGINT,
    c_phone STRING,
    c_acctbal DECIMAL(15,2),
    c_mktsegment STRING,
    c_comment STRING
) USING iceberg
PARTITIONED BY (bucket(10, c_custkey))
TBLPROPERTIES (
    'write.format.default' = 'parquet',
    'write.parquet.compression-codec' = 'snappy',
    'format-version' = '2'
)

## Dataframes example

In [None]:
customer_df = spark.table("customer")

In [None]:
customer_df.groupBy('c_mktsegment').count().orderBy('count',ascending=False).show()

## Drop column

In [None]:
%%sql
alter table spark_catalog.tpch_iceberg.customer drop column c_email;

## Drop table

In [None]:
%%sql
drop table customer_bucket

In [None]:
%%sql 
drop table lineitem_day