In [None]:
%%pyspark
from pyspark.sql.types import *
from pyspark.sql.functions import *

orderSchema = StructType([
    StructField("PurchaseOrderID", IntegerType()),
    StructField("PurchaseOrderDetailID", IntegerType()),
    StructField("DueDate", DateType()),
    StructField("OrderQty", IntegerType()),
    StructField("ProductID", IntegerType()),
    StructField("UnitPrice", DecimalType(12,2)),
    StructField("LineTotal", DecimalType(12,2)),
    StructField("ReceivedQty", DecimalType(12,2)),
    StructField("RejectedQty", DecimalType(12,2)),
    StructField("StockedQty", DecimalType(12,2)),
    StructField("ModifiedDate", DateType())
    ])

csvDF = spark.read.load('abfss://root@adlesilabs.dfs.core.windows.net/demofiles/csv/PurchaseOrderDetail.csv'
             ,format='csv'
             ,schema=orderSchema
             ,header=True
)
display(csvDF.limit(3))

## Two types of tables
- Managed Tables (Internal)
- Unmanaged Tables (External)

```
 
```
## Managed (or Internal) Tables
- Spark manages both the data and the metadata.
- Data is saved in the Spark SQL warehouse directory that is the default for managed tables - 
- Whereas metadata is saved in a meta-store of relational entities (including databases, tables, temporary views) and can be accessed through an interface known as the “catalog”.
- if you delete a managed table, Spark will delete both the table data in the warehouse and the metadata in the meta-store

In [None]:
# No need to mention the file path directoru name
DeltaTableName = "PurchaseOrderDetail_dp203"

# Save as a Delta files
(
    csvDF.write.format("parquet")
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .saveAsTable(DeltaTableName)
)

In [None]:
spark.catalog.listTables()

In [None]:
%%sql
DESCRIBE  PurchaseOrderDetail_dp203

In [None]:
%%sql

SELECT *
FROM PurchaseOrderDetail_dp203 
LIMIT 3

In [None]:
%%sql
SELECT *
FROM PurchaseOrderDetail_SQL
LIMIT 5

In [None]:
%%sql
DROP TABLE PurchaseOrderDetail_dp203;

In [None]:
spark.catalog.listTables()

```
 
```
## Unmanaged (or External) Tables: 
- Spark only manages the metadatabut not the data
- Requires you to specify the exact location where you wish to save the table.
- if you delete an unmanaged table, Spark will just delete the metadata without deleting the data


In [None]:
%%pyspark
# Example: Managed Table (External)


deltaTablePath = "abfss://root@adlesilabs.dfs.core.windows.net/lakedb/sparktable/PurchaseOrderDetail_dp203_umt"

DeltaTableName = "PurchaseOrderDetail_dp203_umt"
# Save as a Parquet files
(
    csvDF.write.format("parquet")
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .option("path", deltaTablePath)
         .saveAsTable(DeltaTableName)
)

In [None]:
spark.catalog.listTables()

In [None]:
%%sql
DESCRIBE  PurchaseOrderDetail_dp203_umt

In [None]:
%%sql

SELECT *
FROM PurchaseOrderDetail_dp203_umt 
LIMIT 3

In [None]:
%%sql
DROP TABLE PurchaseOrderDetail_dp203_umt;

In [None]:
spark.catalog.listTables()