## Delta table read from Hive

In [0]:
# Create Spark Session with Delta JARS and conf

from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("Delta table with Column Mapping") \
    .config('spark.jars.packages', 'io.delta:delta-core_2.12:2.1.1') \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    ) \
    .config("spark.sql.warehouse.dir", "spark-warehouse") \
    .master("local[*]") \
    .enableHiveSupport() \
    .getOrCreate()

spark

In [0]:
# pip install sparksql-magic
# Run below command to enable sparksql
%load_ext sparksql_magic

In [0]:
from delta import DeltaTable

dt = DeltaTable.forName(spark, "sales_delta_managed")
dt.history().select("version", "timestamp", "operation", "operationMetrics").show()

In [0]:
%%sparksql

describe extended default.sales_delta_managed;

In [0]:
%%sparksql

CREATE TABLE default.sales_hive_table
(
    transacted_at timestamp,
    trx_id string,
    retailer_id string,
    description string,
    amount decimal(14,2),
    city string   
)
STORED AS PARQUET
LOCATION "sales_delta_managed/"
;

In [0]:
%%sparksql

describe extended default.sales_hive_table;

In [0]:
%%sparksql

select count(1) from sales_hive_table;

In [0]:
%%sparksql

select count(1) from sales_delta_managed;

In [0]:
# Generate the symlink manifest for the delta table
dt.generate("symlink_format_manifest")

In [0]:
# Vaccum the delta table to read from hive
spark.conf.set("spark.databricks.delta.retentionDurationCheck.enabled","false")
dt.vacuum(0)

In [0]:
%%sparksql

REFRESH TABLE sales_hive_table;

In [0]:
%%sparksql


select count(1) from sales_hive_table;