# Convert Iceberg to Delta Lake

In [3]:
import random
import shutil

from delta import *
from delta.tables import DeltaTable
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

testRoot = "/tmp/delta-iceberg-converter/"
warehousePath = testRoot + "iceberg_tables"
shutil.rmtree(testRoot, ignore_errors=True)

table = "local.db.table"
tablePath = "file://" + warehousePath + "/db/table"

builder = (
    SparkSession.builder.master("local[*]")
    .appName("test")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.local.type", "hadoop")
    .config("spark.sql.catalog.local.warehouse", warehousePath)
)

my_packages = [
    "io.delta:delta-iceberg_2.12:2.3.0",
    "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.0.0",
]

spark = configure_spark_with_delta_pip(
    builder, extra_packages=my_packages
).getOrCreate()

:: loading settings :: url = jar:file:/Users/matthew.powers/opt/miniconda3/envs/pyspark-332-delta-230/lib/python3.9/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/matthew.powers/.ivy2/cache
The jars for the packages stored in: /Users/matthew.powers/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
io.delta#delta-iceberg_2.12 added as a dependency
org.apache.iceberg#iceberg-spark-runtime-3.3_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-995f8b4d-9eed-4d8f-8735-c97842e24dc7;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.3.0 in central
	found io.delta#delta-storage;2.3.0 in central
	found org.antlr#antlr4-runtime;4.8 in central
	found io.delta#delta-iceberg_2.12;2.3.0 in central
	found org.scala-lang.modules#scala-collection-compat_2.12;2.1.1 in central
	found org.apache.iceberg#iceberg-spark-runtime-3.3_2.12;1.0.0 in central
downloading https://repo1.maven.org/maven2/io/delta/delta-iceberg_2.12/2.3.0/delta-iceberg_2.12-2.3.0.jar ...
	[SUCCESSFUL ] io.delta#delta-iceberg_2.12;2.3.0!delta-iceberg_2.12.jar (404ms)
downloading https://repo1.maven.org/maven

23/04/12 13:17:01 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [4]:
spark

## Create Iceberg table

In [5]:
spark.sql(
    f"CREATE TABLE {table} (id BIGINT, data STRING) USING ICEBERG PARTITIONED BY (data)"
)

DataFrame[]

In [6]:
spark.sql(f"INSERT INTO {table} VALUES (1, 'a'), (2, 'b')")

                                                                                

DataFrame[]

In [7]:
spark.sql(f"INSERT INTO {table} VALUES (3, 'c')")

DataFrame[]

In [8]:
spark.sql("select * from local.db.table").show()

+---+----+
| id|data|
+---+----+
|  1|   a|
|  3|   c|
|  2|   b|
+---+----+



In [9]:
!tree /tmp/delta-iceberg-converter/iceberg_tables/db/table/

[01;34m/tmp/delta-iceberg-converter/iceberg_tables/db/table/[0m
├── [01;34mdata[0m
│   ├── [01;34mdata=a[0m
│   │   └── [00m00000-0-fbd8e57f-be89-4fad-9d29-2df71248d789-00001.parquet[0m
│   ├── [01;34mdata=b[0m
│   │   └── [00m00001-1-44c83e06-6962-4292-8e46-3f9e47c30d4a-00001.parquet[0m
│   └── [01;34mdata=c[0m
│       └── [00m00000-2-36892acb-e25f-41ca-a1e9-7e1495510171-00001.parquet[0m
└── [01;34mmetadata[0m
    ├── [00md099f186-c9da-4fb8-8a44-82b1338749a5-m0.avro[0m
    ├── [00me23838f6-3ddf-47c3-85ad-f6c87b140de9-m0.avro[0m
    ├── [00msnap-3685361640488939140-1-e23838f6-3ddf-47c3-85ad-f6c87b140de9.avro[0m
    ├── [00msnap-8632565756425441974-1-d099f186-c9da-4fb8-8a44-82b1338749a5.avro[0m
    ├── [00mv1.metadata.json[0m
    ├── [00mv2.metadata.json[0m
    ├── [00mv3.metadata.json[0m
    └── [00mversion-hint.text[0m

5 directories, 11 files


## Convert to Delta

In [13]:
spark.sql(f"CONVERT TO DELTA iceberg.`{tablePath}`")

23/04/12 13:26:17 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


DataFrame[]

In [14]:
spark.read.format("delta").load(tablePath).show()

+---+----+
| id|data|
+---+----+
|  2|   b|
|  1|   a|
|  3|   c|
+---+----+



In [15]:
!tree /tmp/delta-iceberg-converter/iceberg_tables/db/table/

[01;34m/tmp/delta-iceberg-converter/iceberg_tables/db/table/[0m
├── [01;34m_delta_log[0m
│   ├── [00m00000000000000000000.checkpoint.parquet[0m
│   ├── [00m00000000000000000000.json[0m
│   └── [00m_last_checkpoint[0m
├── [01;34mdata[0m
│   ├── [01;34mdata=a[0m
│   │   └── [00m00000-0-fbd8e57f-be89-4fad-9d29-2df71248d789-00001.parquet[0m
│   ├── [01;34mdata=b[0m
│   │   └── [00m00001-1-44c83e06-6962-4292-8e46-3f9e47c30d4a-00001.parquet[0m
│   └── [01;34mdata=c[0m
│       └── [00m00000-2-36892acb-e25f-41ca-a1e9-7e1495510171-00001.parquet[0m
└── [01;34mmetadata[0m
    ├── [00md099f186-c9da-4fb8-8a44-82b1338749a5-m0.avro[0m
    ├── [00me23838f6-3ddf-47c3-85ad-f6c87b140de9-m0.avro[0m
    ├── [00msnap-3685361640488939140-1-e23838f6-3ddf-47c3-85ad-f6c87b140de9.avro[0m
    ├── [00msnap-8632565756425441974-1-d099f186-c9da-4fb8-8a44-82b1338749a5.avro[0m
    ├── [00mv1.metadata.json[0m
    ├── [00mv2.metadata.json[0m
    ├── [00mv3.metadata.json[0m
    └── 

## Modifying the converted table

In [16]:
spark.sql(f"INSERT INTO delta.`{tablePath}` VALUES (4, 'd')")

DataFrame[]

In [17]:
spark.read.format("delta").load(tablePath).show()

+---+----+
| id|data|
+---+----+
|  4|   d|
|  2|   b|
|  1|   a|
|  3|   c|
+---+----+



## Create an external catalog table using Delta.

In [19]:
spark.sql(f"CREATE TABLE converted_delta_table USING delta LOCATION '{tablePath}'")

DataFrame[]

In [20]:
spark.read.table("converted_delta_table").show()

+---+----+
| id|data|
+---+----+
|  4|   d|
|  2|   b|
|  1|   a|
|  3|   c|
+---+----+



## Cleanup

In [22]:
shutil.rmtree(testRoot, ignore_errors=True)