# Convert Iceberg to Delta Lake

In [1]:
import random
import shutil

from delta import *
from delta.tables import DeltaTable
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

testRoot = "/tmp/delta-iceberg-converter/"
warehousePath = testRoot + "iceberg_tables"
shutil.rmtree(testRoot, ignore_errors=True)

table = "local.some_db.my_fun_table"
tablePath = "file://" + warehousePath + "/db/table"

builder = (
    SparkSession.builder.master("local[*]")
    .appName("test")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.local.type", "hadoop")
    .config("spark.sql.catalog.local.warehouse", warehousePath)
)

my_packages = [
    "io.delta:delta-iceberg_2.12:2.3.0",
    "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.0.0",
]

spark = configure_spark_with_delta_pip(
    builder, extra_packages=my_packages
).getOrCreate()

:: loading settings :: url = jar:file:/Users/matthew.powers/opt/miniconda3/envs/pyspark-332-delta-230/lib/python3.9/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/matthew.powers/.ivy2/cache
The jars for the packages stored in: /Users/matthew.powers/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
io.delta#delta-iceberg_2.12 added as a dependency
org.apache.iceberg#iceberg-spark-runtime-3.3_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-195c9a13-2cef-461f-b9eb-237bd9058758;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.3.0 in central
	found io.delta#delta-storage;2.3.0 in central
	found org.antlr#antlr4-runtime;4.8 in central
	found io.delta#delta-iceberg_2.12;2.3.0 in central
	found org.scala-lang.modules#scala-collection-compat_2.12;2.1.1 in central
	found org.apache.iceberg#iceberg-spark-runtime-3.3_2.12;1.0.0 in central
:: resolution report :: resolve 147ms :: artifacts dl 7ms
	:: modules in use:
	io.delta#delta-core_2.12;2.3.0 from central in [default]
	io.delta#delta-iceberg_2.12;2.3.0 from central in [default]
	io.delta#delta-storage;2.3.0 from

23/07/17 11:06:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [2]:
spark

## Create Iceberg table

In [3]:
spark.sql("CREATE TABLE local.some_db.my_fun_table (id BIGINT, data STRING) USING ICEBERG")

DataFrame[]

In [4]:
spark.sql("INSERT INTO local.some_db.my_fun_table VALUES (1, 'a'), (2, 'b'), (3, 'c')")

                                                                                

DataFrame[]

## Append rows to existing table

In [5]:
spark.sql("INSERT INTO local.some_db.my_fun_table VALUES (4, 'd')")

DataFrame[]

In [7]:
spark.sql("select * from local.some_db.my_fun_table").show()

+---+----+
| id|data|
+---+----+
|  1|   a|
|  4|   d|
|  2|   b|
|  3|   c|
+---+----+



In [9]:
!tree /tmp/delta-iceberg-converter/iceberg_tables/some_db/my_fun_table/

[01;34m/tmp/delta-iceberg-converter/iceberg_tables/some_db/my_fun_table/[0m
├── [01;34mdata[0m
│   ├── [00m00000-0-ba967636-9bc4-4624-af57-c8eecf6e93ff-00001.parquet[0m
│   ├── [00m00000-3-55d19b49-b2b4-43a6-b6bb-b70573a1f39d-00001.parquet[0m
│   ├── [00m00001-1-270a4c8f-eb78-412d-b383-6a8ba380f97c-00001.parquet[0m
│   └── [00m00002-2-4167155a-f76a-48ff-8f6c-c57d16d17404-00001.parquet[0m
└── [01;34mmetadata[0m
    ├── [00m3f5d7388-055d-41ac-a81f-b6238698dfae-m0.avro[0m
    ├── [00m53857548-893b-41af-b5ef-331660534303-m0.avro[0m
    ├── [00msnap-1156653990101004919-1-53857548-893b-41af-b5ef-331660534303.avro[0m
    ├── [00msnap-3920219495340820176-1-3f5d7388-055d-41ac-a81f-b6238698dfae.avro[0m
    ├── [00mv1.metadata.json[0m
    ├── [00mv2.metadata.json[0m
    ├── [00mv3.metadata.json[0m
    └── [00mversion-hint.text[0m

2 directories, 12 files


## Alter table ADD COLUMN

In [10]:
spark.sql(
    """
ALTER TABLE local.some_db.my_fun_table
ADD COLUMNS (
    my_new_column string
  )
"""
)

DataFrame[]

In [11]:
spark.sql("select * from local.some_db.my_fun_table").show()

+---+----+-------------+
| id|data|my_new_column|
+---+----+-------------+
|  1|   a|         null|
|  4|   d|         null|
|  2|   b|         null|
|  3|   c|         null|
+---+----+-------------+



## Alter table RENAME COLUMN

In [12]:
spark.sql(
    """
ALTER TABLE local.some_db.my_fun_table RENAME COLUMN data TO letter
"""
)

DataFrame[]

In [13]:
spark.sql("select * from local.some_db.my_fun_table").show()

+---+------+-------------+
| id|letter|my_new_column|
+---+------+-------------+
|  4|     d|         null|
|  1|     a|         null|
|  2|     b|         null|
|  3|     c|         null|
+---+------+-------------+



## Drop table

## Cleanup

In [22]:
shutil.rmtree(testRoot, ignore_errors=True)