# Delta Lake Managed vs External Tables

This notebook demonstrates how to create managed and external tables with Delta Lake.

In [13]:
import pyspark
from delta import *
from pyspark.sql.types import *

In [14]:
builder = (
    pyspark.sql.SparkSession.builder.appName("MyApp")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
)

In [15]:
spark = configure_spark_with_delta_pip(builder).getOrCreate()

## Create external Delta Lake table with save

In [16]:
columns = ["movie", "release_date"]
data = [("The Godfather", 1972), ("Detective Pikachu", 2019), ("Donny Darko", 2001)]
rdd = spark.sparkContext.parallelize(data)
df = rdd.toDF(columns)

In [17]:
df.write.format("delta").save("external_table1")

                                                                                

In [18]:
!tree external_table1

[01;34mexternal_table1[0m
├── [01;34m_delta_log[0m
│   └── [00m00000000000000000000.json[0m
├── [00mpart-00000-a0107ffb-2b7e-415e-9577-b401aca15fde-c000.snappy.parquet[0m
├── [00mpart-00003-1dd9de51-04ea-40f7-a677-dec4f8b53b73-c000.snappy.parquet[0m
├── [00mpart-00006-160d7342-1967-4c16-828d-af55e14daf75-c000.snappy.parquet[0m
└── [00mpart-00009-5484d2f0-2378-4815-bd8e-92abe7250e03-c000.snappy.parquet[0m

1 directory, 5 files


In [6]:
spark.read.format("delta").load("external_table1").show()

+-----------------+------------+
|            movie|release_date|
+-----------------+------------+
|Detective Pikachu|        2019|
|    The Godfather|        1972|
|      Donny Darko|        2001|
+-----------------+------------+



In [12]:
df.createOrReplaceTempView("some_external_table")

In [13]:
spark.sql("DESCRIBE TABLE EXTENDED some_external_table").show(truncate=False)

+------------+---------+-------+
|col_name    |data_type|comment|
+------------+---------+-------+
|movie       |string   |null   |
|release_date|bigint   |null   |
+------------+---------+-------+

22/09/12 10:25:40 WARN JavaUtils: Attempt to delete using native Unix OS command failed for path = /private/var/folders/19/_52w4zps3xjc6plz_f63j8sh0000gp/T/spark-e20f8ea5-85f5-4630-b664-28f64608a43b. Falling back to Java IO way
java.io.IOException: Failed to delete: /private/var/folders/19/_52w4zps3xjc6plz_f63j8sh0000gp/T/spark-e20f8ea5-85f5-4630-b664-28f64608a43b
	at org.apache.spark.network.util.JavaUtils.deleteRecursivelyUsingUnixNative(JavaUtils.java:171)
	at org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:110)
	at org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:91)
	at org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:1206)
	at org.apache.spark.util.ShutdownHookManager$.$anonfun$new$4(ShutdownHookManager.scala:65)
	at org.apach

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 53678)
Traceback (most recent call last):
  File "/Users/matthew.powers/opt/miniconda3/envs/pyspark-330-delta-210/lib/python3.9/socketserver.py", line 316, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/Users/matthew.powers/opt/miniconda3/envs/pyspark-330-delta-210/lib/python3.9/socketserver.py", line 347, in process_request
    self.finish_request(request, client_address)
  File "/Users/matthew.powers/opt/miniconda3/envs/pyspark-330-delta-210/lib/python3.9/socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/Users/matthew.powers/opt/miniconda3/envs/pyspark-330-delta-210/lib/python3.9/socketserver.py", line 747, in __init__
    self.handle()
  File "/Users/matthew.powers/opt/miniconda3/envs/pyspark-330-delta-210/lib/python3.9/site-packages/pyspark/accumulators.py", line 281, in ha

In [7]:
%rm -rf external_table1

## Create external Delta Lake table with saveAsTable

In [8]:
df.write.format("delta").option("path", "external_table2").saveAsTable(
    "default.external_table2"
)

                                                                                

In [10]:
spark.sql("select * from external_table2").show()

AnalysisException: `default`.`external_table2` is not a Delta table.

In [15]:
%rm -rf external_table2

## Creating Delta Lake Managed Table

In [20]:
df.write.format("delta").mode("overwrite").saveAsTable("some_managed_table")

                                                                                

In [21]:
!tree spark-warehouse/some_managed_table

[01;34mspark-warehouse/some_managed_table[0m
├── [01;34m_delta_log[0m
│   ├── [00m00000000000000000000.json[0m
│   └── [00m00000000000000000001.json[0m
├── [00mpart-00000-08a07bba-17eb-46c2-aca1-00027e428324-c000.snappy.parquet[0m
├── [00mpart-00000-9066a80f-a65e-4308-9dc4-710578c0ab51-c000.snappy.parquet[0m
├── [00mpart-00003-10832649-5b19-4ac3-bfa0-89ef1b93ab00-c000.snappy.parquet[0m
├── [00mpart-00003-13a1cc33-609b-47ae-ba7e-449af22be97c-c000.snappy.parquet[0m
├── [00mpart-00006-42c98060-84dd-4e11-96ba-fd9fe1b68be2-c000.snappy.parquet[0m
├── [00mpart-00006-d1b2613a-baec-49c7-878c-f41991253dae-c000.snappy.parquet[0m
├── [00mpart-00009-ec1ca8f5-8e4b-4a52-9bbf-6d0b8742a185-c000.snappy.parquet[0m
└── [00mpart-00009-f5822aee-c606-4bed-9572-2d88e5037335-c000.snappy.parquet[0m

1 directory, 10 files


In [8]:
spark.sql("select * from some_managed_table").show()

+-----------------+------------+
|            movie|release_date|
+-----------------+------------+
|Detective Pikachu|        2019|
|    The Godfather|        1972|
|      Donny Darko|        2001|
+-----------------+------------+



In [12]:
spark.sql("SHOW TABLE EXTENDED `some_managed_table`").show(truncate=False)

ParseException: 
Syntax error at or near '`some_managed_table`'(line 1, pos 20)

== SQL ==
SHOW TABLE EXTENDED `some_managed_table`
--------------------^^^


In [10]:
spark.sql("DESCRIBE TABLE EXTENDED some_managed_table").show(truncate=False)

+----------------------------+---------------------------------------------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                                                            |comment|
+----------------------------+---------------------------------------------------------------------------------------------------------------------+-------+
|movie                       |string                                                                                                               |       |
|release_date                |bigint                                                                                                               |       |
|                            |                                                                                                                     |       |
|# Partitioning              |                            