## Hello Delta Lake 

In [1]:
# PreRequisites
import pyspark
from delta import *

In [2]:

builder = pyspark.sql.SparkSession.builder.appName("LocalDelta") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [3]:
spark.sparkContext.getConf().getAll()

[('spark.app.initial.jar.urls',
  'spark://f94cf9a1a806:36443/jars/org.antlr_antlr4-runtime-4.9.3.jar,spark://f94cf9a1a806:36443/jars/io.delta_delta-storage-2.4.0.jar,spark://f94cf9a1a806:36443/jars/io.delta_delta-core_2.12-2.4.0.jar'),
 ('spark.submit.pyFiles',
  '/home/jovyan/.ivy2/jars/io.delta_delta-core_2.12-2.4.0.jar,/home/jovyan/.ivy2/jars/io.delta_delta-storage-2.4.0.jar,/home/jovyan/.ivy2/jars/org.antlr_antlr4-runtime-4.9.3.jar'),
 ('spark.repl.local.jars',
  'file:///home/jovyan/.ivy2/jars/io.delta_delta-core_2.12-2.4.0.jar,file:///home/jovyan/.ivy2/jars/io.delta_delta-storage-2.4.0.jar,file:///home/jovyan/.ivy2/jars/org.antlr_antlr4-runtime-4.9.3.jar'),
 ('spark.app.name', 'LocalDelta'),
 ('spark.app.initial.file.urls',
  'file:///home/jovyan/.ivy2/jars/io.delta_delta-core_2.12-2.4.0.jar,file:///home/jovyan/.ivy2/jars/org.antlr_antlr4-runtime-4.9.3.jar,file:///home/jovyan/.ivy2/jars/io.delta_delta-storage-2.4.0.jar'),
 ('spark.driver.host', 'f94cf9a1a806'),
 ('spark.executor

In [4]:
data = spark.range(0, 5)
data.write.format("delta").mode("overwrite").save("/tmp/delta-table")

In [5]:
# Read data from the Delta table
df = spark.read.format("delta").load("/tmp/delta-table")
df.show()

+---+
| id|
+---+
|  0|
|  3|
|  4|
|  1|
|  2|
+---+



---

### Load data into Delta Lake Table

In [6]:
file_path = "data/loans/loan-risks.snappy.parquet"

# Configure Delta Lake path
delta_path = "/tmp/loans_delta"

# Create the Delta Lake Table
(spark.read.format("parquet").load(file_path).write.format("delta").mode("overwrite").save(delta_path))

# Create a View on the data 
spark.read.format("delta").load(delta_path).createOrReplaceTempView("loans_delta_tbl")

In [7]:
# Show row count
spark.sql("SELECT count(*) from loans_delta_tbl").show()

+--------+
|count(1)|
+--------+
|   14705|
+--------+



In [8]:
# Show first rows
spark.sql("SELECT * from loans_delta_tbl").show()

+-------+-----------+---------+----------+
|loan_id|funded_amnt|paid_amnt|addr_state|
+-------+-----------+---------+----------+
|      0|       1000|   182.22|        CA|
|      1|       1000|   361.19|        WA|
|      2|       1000|   176.26|        TX|
|      3|       1000|   1000.0|        OK|
|      4|       1000|   249.98|        PA|
|      5|       1000|    408.6|        CA|
|      6|       1000|   1000.0|        MD|
|      7|       1000|   168.81|        OH|
|      8|       1000|   193.64|        TX|
|      9|       1000|   218.83|        CT|
|     10|       1000|   322.37|        NJ|
|     11|       1000|   400.61|        NY|
|     12|       1000|   1000.0|        FL|
|     13|       1000|   165.88|        NJ|
|     14|       1000|    190.6|        TX|
|     15|       1000|   1000.0|        OH|
|     16|       1000|   213.72|        MI|
|     17|       1000|   188.89|        MI|
|     18|       1000|   237.41|        CA|
|     19|       1000|   203.85|        CA|
+-------+--