In [7]:
%AddJar https://repo1.maven.org/maven2/io/delta/delta-core_2.11/0.4.0/delta-core_2.11-0.4.0.jar

Starting download from https://repo1.maven.org/maven2/io/delta/delta-core_2.11/0.4.0/delta-core_2.11-0.4.0.jar
Finished download of delta-core_2.11-0.4.0.jar


lastException: Throwable = null


In [8]:
import org.apache.spark.sql.delta.DeltaLog
import io.delta.tables._
import org.apache.spark.sql.functions._ 
import org.apache.spark.sql.{SaveMode, SparkSession, DataFrame}

# Create Partitioned table 

In [70]:
  def createPartitionedTable(data: DataFrame, tableName: String, Keys: String): Unit = {
  data
      .write
      .partitionBy(Keys)
      .format("delta")
      .mode("overwrite")
      .option("mergeSchema", "true")
      .save("/opt/partitioned_lake/" + tableName)
  }

createPartitionedTable: (data: org.apache.spark.sql.DataFrame, tableName: String, Keys: String)Unit


# Overwrite only the data that matches predicates over partition columns

In [71]:
def updatePartitionedTable(data: DataFrame, tableName: String, condition: String): Unit = {
  data
      .write
      .format("delta")
      .option("replaceWhere", condition)
      .mode("overwrite")
      .option("mergeSchema", "true")
      .save("/opt/partitioned_lake/" + tableName)
  }

updatePartitionedTable: (data: org.apache.spark.sql.DataFrame, tableName: String, condition: String)Unit


# Read table

In [74]:
 def readTable(tableName: String): DataFrame = {
   val df = spark
      .read
      .format("delta")
      .load("/opt/partitioned_lake/" + tableName)
     df
  }

readTable: (tableName: String)org.apache.spark.sql.DataFrame


In [47]:
val sales_df = spark.read.option("header",true).csv("Sale_test.csv")

sales_df = [id: string, product_id: string ... 2 more fields]


[id: string, product_id: string ... 2 more fields]

In [48]:
val modifiedDF = sales_df.withColumn("date", date_format($"created_at", "yyyy-MM-dd"))

modifiedDF = [id: string, product_id: string ... 3 more fields]


[id: string, product_id: string ... 3 more fields]

In [53]:
modifiedDF.createOrReplaceTempView("sales")
modifiedDF.show()

+---+----------+-------------------+-----+----------+
| id|product_id|         created_at|units|      date|
+---+----------+-------------------+-----+----------+
|  1|       205|1970-01-01 00:00:15|    1|1970-01-01|
|  2|       338|1970-01-01 00:01:15|    2|1970-01-01|
|  3|       461|1970-01-01 00:01:50|    5|1970-01-01|
|  4|       705|1970-01-06 03:25:55|    2|1970-01-06|
|  5|       919|1970-01-06 03:26:15|    2|1970-01-06|
|  6|       216|1970-01-06 03:26:35|    2|1970-01-06|
|  7|       668|2009-11-14 06:41:05|    5|2009-11-14|
|  8|       705|2009-11-14 06:41:10|    2|2009-11-14|
|  9|       900|2009-11-14 06:41:15|    5|2009-11-14|
| 10|       275|2009-11-14 06:41:20|    1|2009-11-14|
| 11|        80|2009-11-14 06:42:10|    1|2009-11-14|
| 12|        88|2010-03-26 21:46:45|    1|2010-03-26|
| 13|       240|2010-03-26 21:47:00|    5|2010-03-26|
| 14|       226|2010-03-26 21:47:20|    5|2010-03-26|
| 15|       463|2010-03-26 21:48:00|    3|2010-03-26|
| 16|      1014|2010-03-26 2

In [67]:
createPartitionedTable(modifiedDF, "sales", "date")

In [68]:
val res_df = spark.sql("select id, "+
"product_id,created_at,date ,case when date='1970-01-01' then units*10 else units end as units from sales where date='1970-01-01'")
res_df.show()

+---+----------+-------------------+----------+-----+
| id|product_id|         created_at|      date|units|
+---+----------+-------------------+----------+-----+
|  1|       205|1970-01-01 00:00:15|1970-01-01| 10.0|
|  2|       338|1970-01-01 00:01:15|1970-01-01| 20.0|
|  3|       461|1970-01-01 00:01:50|1970-01-01| 50.0|
+---+----------+-------------------+----------+-----+



res_df = [id: string, product_id: string ... 3 more fields]


[id: string, product_id: string ... 3 more fields]

In [72]:
updatePartitionedTable(res_df,"sales", "date= '1970-01-01'")

In [75]:
readTable("sales").show(false)

+---+----------+-------------------+-----+----------+
|id |product_id|created_at         |units|date      |
+---+----------+-------------------+-----+----------+
|12 |88        |2010-03-26 21:46:45|1    |2010-03-26|
|13 |240       |2010-03-26 21:47:00|5    |2010-03-26|
|14 |226       |2010-03-26 21:47:20|5    |2010-03-26|
|15 |463       |2010-03-26 21:48:00|3    |2010-03-26|
|16 |1014      |2010-03-26 21:48:30|4    |2010-03-26|
|7  |668       |2009-11-14 06:41:05|5    |2009-11-14|
|8  |705       |2009-11-14 06:41:10|2    |2009-11-14|
|9  |900       |2009-11-14 06:41:15|5    |2009-11-14|
|10 |275       |2009-11-14 06:41:20|1    |2009-11-14|
|11 |80        |2009-11-14 06:42:10|1    |2009-11-14|
|17 |900       |2010-03-27 20:20:55|1    |2010-03-27|
|18 |283       |2010-03-27 20:21:30|1    |2010-03-27|
|19 |42        |2010-03-27 20:22:15|1    |2010-03-27|
|4  |705       |1970-01-06 03:25:55|2    |1970-01-06|
|5  |919       |1970-01-06 03:26:15|2    |1970-01-06|
|6  |216       |1970-01-06 0