**Employee data**

In [0]:
def getSchema():
    from pyspark.sql.types import MapType, StructType, StructField, StringType, IntegerType, ArrayType, DoubleType

    invoice_schema = StructType([
            StructField('InvoiceNumber', StringType(), True),
            StructField('CreatedTime', IntegerType(), True),
            StructField('StoreID', StringType(), True), 
            StructField('CashierID', StringType(), True),
            StructField('CustomerType', StringType(), True),
            StructField('CustomerCardNo', StringType(), True),
            StructField('TotalAmount', DoubleType(), True),
            StructField('NumberOfItems', IntegerType(), True),
            StructField('PaymentMethod', StringType(), True),
            StructField('TaxableAmount', StringType(), True),
            StructField('CGST', DoubleType(), True),
            StructField('SGST', DoubleType(), True),
            StructField('CESS', DoubleType(), True),
            StructField('DeliveryType', StringType(), True),
            StructField('DeliveryAddress', StructType([
                StructField('AddressLine', StringType(), True),
                StructField('City', StringType(), True),
                StructField('State', StringType(), True),
                StructField('PinCode', StringType(), True),
                StructField('ContactNumber', StringType(), True)
            ])),
            StructField('InvoiceLineItems', ArrayType(StructType([
                StructField('ItemCode', StringType(), True),
                StructField('ItemDescription', StringType(), True),
                StructField('ItemPrice', DoubleType(), True),
                StructField('ItemQty', IntegerType(), True),
                StructField('TotalValue', DoubleType(), True)
            ])))
        ])
    return invoice_schema

In [0]:
emp_df = (spark.read.format('json') 
    #.option('header', 'true') \
    .schema(getSchema())
    .load('dbfs:/FileStore/invoices')
)
display(emp_df)


InvoiceNumber,CreatedTime,StoreID,CashierID,CustomerType,CustomerCardNo,TotalAmount,NumberOfItems,PaymentMethod,TaxableAmount,CGST,SGST,CESS,DeliveryType,DeliveryAddress,InvoiceLineItems
94201418,,STR7443,OAS845,PRIME,8790333340,9000.0,4,CASH,9000.0,225.0,225.0,11.25,HOME-DELIVERY,"List(House No 383, 4427 Pellentesque Rd., Bokaro Steel City, Jharkhand, 509723, 1442202063)","List(List(258, Closet, 1687.0, 2, 3374.0), List(538, Grandmother clock, 1301.0, 1, 1301.0), List(528, Projection clock, 2365.0, 1, 2365.0), List(673, Dough scraper, 980.0, 2, 1960.0))"
8749479,,STR5864,OAS287,PRIME,7589671731,7646.0,4,CASH,7646.0,191.15,191.15,9.5575,HOME-DELIVERY,"List(1852 Est St., Imphal, Manipur, 120023, 6124913142)","List(List(593, Hanging curtains, 1896.0, 2, 3792.0), List(308, Butterfly chair, 857.0, 2, 1714.0), List(383, Innerspring Mattress, 655.0, 1, 655.0), List(423, Quilt, 1485.0, 1, 1485.0))"
91509413,,STR2629,OAS737,NONPRIME,2461788838,7453.0,3,CARD,7453.0,186.325,186.325,9.31625,HOME-DELIVERY,"List(House No 740, 6689 Tempor Av., Bharatpur, Rajasthan, 932264, 8563079826)","List(List(528, Projection clock, 2365.0, 2, 4730.0), List(503, Chef's knife, 1973.0, 1, 1973.0), List(653, Browning tray, 375.0, 2, 750.0))"
54315437,,STR5864,OAS287,PRIME,7589671731,4006.0,2,CASH,4006.0,100.15,100.15,5.0075,HOME-DELIVERY,"List(Flat No. #335-7984 Senectus Rd., Pali, Rajasthan, 900530, 6125618251)","List(List(238, Dining table, 1582.0, 2, 3164.0), List(273, Bedroom set, 842.0, 1, 842.0))"
84526449,,STR2952,OAS329,PRIME,3027514652,3288.0,1,CASH,3288.0,82.2,82.2,4.11,TAKEAWAY,,"List(List(458, Wine glass, 1644.0, 2, 3288.0))"
4014389,,STR5494,OAS969,NONPRIME,8189067868,1894.0,1,CASH,1894.0,47.35,47.35,2.3675,TAKEAWAY,,"List(List(268, Floating shelf, 1894.0, 1, 1894.0))"
36524242,,STR3781,OAS311,PRIME,4692642935,1955.0,1,CASH,1955.0,48.875,48.875,2.44375,HOME-DELIVERY,"List(8612 Non Rd., Guna, Madhya Pradesh, 210683, 8336980338)","List(List(643, Blow torch, 1955.0, 1, 1955.0))"
35058148,,STR1534,OAS285,PRIME,5582740626,2944.0,3,CARD,2944.0,73.60000000000001,73.60000000000001,3.68,TAKEAWAY,,"List(List(668, Crab cracker, 785.0, 1, 785.0), List(658, Chinois, 567.0, 1, 567.0), List(633, Cafe Curtains, 796.0, 2, 1592.0))"
95067626,,STR2629,OAS622,NONPRIME,7829975914,2297.0,2,CASH,2297.0,57.425,57.425,2.87125,HOME-DELIVERY,"List(7409 Laoreet Rd., Jammu, Jammu and Kashmir, 697806, 8335722151)","List(List(233, Coffee table, 1055.0, 1, 1055.0), List(408, Confidante, 1242.0, 1, 1242.0))"
17921881,,STR1955,OAS183,NONPRIME,3916555911,773.0,1,CASH,773.0,19.325000000000003,19.325000000000003,0.96625,HOME-DELIVERY,"List(529-4520 Libero. Ave, Raigarh, Chhattisgarh, 183678, 3057906681)","List(List(398, Latex Mattress, 773.0, 1, 773.0))"


In [0]:
#dbutils.fs.mkdirs('dbfs:/FileStore/SalesDelta')

Out[7]: True

In [0]:
from pyspark.sql.functions import spark_partition_id

partition_df=emp_df.withColumn('partition_id',spark_partition_id())
partition_df.show()

In [0]:
display(partition_df.groupBy('partition_id').count())

partition_id,count
0,590
1,501
2,500
3,10


In [0]:
emp_df=emp_df.repartition(4)

In [0]:
partition_df=emp_df.withColumn('partition_id',spark_partition_id())
partition_df.show()

+-----------+------------+-------------------+------------+
|employee_id|entry_detail|   timestamp_detail|partition_id|
+-----------+------------+-------------------+------------+
|       1002|       login|2023-06-25 01:00:34|           0|
|       1004|       login|2023-06-30 01:00:34|           0|
|       1000|       login|2023-06-16 01:00:34|           0|
|       1003|       login|2023-06-28 01:00:34|           0|
|       1000|       login|2023-06-18 01:00:34|           1|
|       1002|       login|2023-06-24 01:00:34|           1|
|       1003|      logout|2023-06-29 01:00:34|           1|
|       1002|      logout|2023-06-26 01:00:34|           1|
|       1001|      logout|2023-06-23 01:00:34|           2|
|       1003|       login|2023-06-27 01:00:34|           2|
|       1004|      logout|2023-07-01 01:00:34|           2|
|       1001|       login|2023-06-20 01:00:34|           2|
|       1001|       login|2023-06-21 01:00:34|           3|
|       1000|      logout|2023-06-17 01:

In [0]:
display(partition_df.groupBy('partition_id').count())

partition_id,count
0,4
1,4
2,4
3,4


In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/invoice_delta", recurse=True)


Out[31]: True

In [0]:
#dbutils.fs.mkdirs("dbfs:/FileStore/SalesDelta")


Out[9]: True

In [0]:
#create delta table
emp_df.write.format('delta').mode('overwrite').saveAsTable('invoice_delta')

In [0]:
%sql

select * from invoice_delta where InvoiceNumber ='51402977'limit 10

InvoiceNumber,CreatedTime,StoreID,CashierID,CustomerType,CustomerCardNo,TotalAmount,NumberOfItems,PaymentMethod,TaxableAmount,CGST,SGST,CESS,DeliveryType,DeliveryAddress,InvoiceLineItems
51402977,,STR7188,OAS134,Not PRIME,4629185211,11114.0,4,CARD,11114.0,277.85,277.85,13.8925,TAKEAWAY,,"List(List(458, Wine glass, 1644.0, 2, 3288.0), List(283, Portable Lamps, 2236.0, 1, 2236.0), List(498, Carving knifes, 1424.0, 2, 2848.0), List(523, Oil-lamp clock, 1371.0, 2, 2742.0))"


In [0]:
%sql
DESCRIBE HISTORY invoice_delta

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2025-04-13T14:47:40.000+0000,7937032294484231,ghdebasish44@gmail.com,UPDATE,"Map(predicate -> [""(InvoiceNumber#5114 = 51402977)""])",,List(2026787703974052),0413-134854-s8xl425w,0.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 41462, numCopiedRows -> 500, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 4577, scanTimeMs -> 2383, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 41535, rewriteTimeMs -> 2168)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-04-13T14:36:54.000+0000,7937032294484231,ghdebasish44@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(2026787703974052),0413-134854-s8xl425w,,WriteSerializable,False,"Map(numFiles -> 4, numOutputRows -> 1601, numOutputBytes -> 139350)",,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%sql
UPDATE invoice_delta
set CustomerType='Not PRIME'
where InvoiceNumber='51402977'

num_affected_rows
1


In [0]:
%sql
select * from invoice_delta VERSION AS OF 0

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-2286694977605771>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-2286694977605771>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;

In [0]:
%sql
RESTORE invoice_delta versiom 

In [0]:
transactions_file = "dbfs:/FileStore/transaction"
df_transactions = spark.read.parquet(transactions_file)
df_transactions.show(5)

+----------+----------+----------+---------------+----------+----+-----+---+-------------+------+-----------+
|   cust_id|start_date|  end_date|         txn_id|      date|year|month|day| expense_type|   amt|       city|
+----------+----------+----------+---------------+----------+----+-----+---+-------------+------+-----------+
|C0YDPQWPBJ|2010-07-01|2018-12-01|TZ5SMKZY9S03OQJ|2018-10-07|2018|   10|  7|Entertainment| 10.42|     boston|
|C0YDPQWPBJ|2010-07-01|2018-12-01|TYIAPPNU066CJ5R|2016-03-27|2016|    3| 27| Motor/Travel| 44.34|   portland|
|C0YDPQWPBJ|2010-07-01|2018-12-01|TETSXIK4BLXHJ6W|2011-04-11|2011|    4| 11|Entertainment|  3.18|    chicago|
|C0YDPQWPBJ|2010-07-01|2018-12-01|TQKL1QFJY3EM8LO|2018-02-22|2018|    2| 22|    Groceries|268.97|los_angeles|
|C0YDPQWPBJ|2010-07-01|2018-12-01|TYL6DFP09PPXMVB|2010-10-16|2010|   10| 16|Entertainment|  2.66|    chicago|
+----------+----------+----------+---------------+----------+----+-----+---+-------------+------+-----------+
only showi

In [0]:
df_transactions.rdd.getNumPartitions()

Out[5]: 12

In [0]:
from pyspark.sql.functions import *

display(df_transactions.withColumn('partnid',spark_partition_id()).groupBy('partnid').count().alias('no_of_recs'))

partnid,count
0,3592625
1,3663839
2,3454630
3,3657055
4,3494213
5,3408595
6,3253507
7,3357514
8,3129149
9,3130443


In [0]:
df_transactions=df_transactions.repartition(3)

In [0]:
df_transactions.rdd.getNumPartitions()

Out[8]: 3

In [0]:
display(df_transactions.withColumn('partnid',spark_partition_id()).groupBy('partnid').count().alias('no_of_recs'))

partnid,count
0,13263365
1,13263363
2,13263364


In [0]:
dbutils.fs.mkdirs('dbfs:/FileStore/parquet')
dbutils.fs.mkdirs('dbfs:/FileStore/delta')

Out[10]: True

**write as parquet file**

In [0]:
df_transactions.write.format('parquet').mode('overwrite').save('dbfs:/FileStore/parquet')

In [0]:
display(spark.read.text('dbfs:/FileStore/parquet/part-00000-tid-5947889247407689251-fa9b6987-602f-4d0b-af47-9c5d7383cf99-55-1-c000.snappy.parquet'))

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-1557465436307198>:1[0m
[0;32m----> 1[0m display([43mspark[49m[38;5;241;43m.[39;49m[43mread[49m[38;5;241;43m.[39;49m[43mtext[49m[43m([49m[38;5;124;43m'[39;49m[38;5;124;43mdbfs:/FileStore/parquet/part-00000-tid-5947889247407689251-fa9b6987-602f-4d0b-af47-9c5d7383cf99-55-1-c000.snappy.parquet[39;49m[38;5;124;43m'[39;49m[43m)[49m)

File [0;32m/databricks/spark/python/pyspark/instrumentation_utils.py:48[0m, in [0;36m_wrap_function.<locals>.wrapper[0;34m(*args, **kwargs)[0m
[1;32m     46[0m start [38;5;241m=[39m time[38;5;241m.[39mperf_counter()
[1;32m     47[0m [38;5;28;01mtry[39;00m:
[0;32m---> 48[0m     res [38;5;241m=[39m [43mfunc[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43m

**write as delta file**

In [0]:
df_transactions.write.format('delta').mode('overwrite').save('dbfs:/FileStore/delta')



In [0]:
display(spark.read.text('dbfs:/FileStore/delta/_delta_log/00000000000000000001.json'))


value
"{""commitInfo"":{""timestamp"":1734246400567,""userId"":""7937032294484231"",""userName"":""ghdebasish44@gmail.com"",""operation"":""WRITE"",""operationParameters"":{""mode"":""Overwrite"",""partitionBy"":""[]""},""notebook"":{""notebookId"":""2026787703974052""},""clusterId"":""1215-062728-jpptlz7n"",""readVersion"":0,""isolationLevel"":""WriteSerializable"",""isBlindAppend"":false,""operationMetrics"":{""numFiles"":""8"",""numOutputRows"":""39790092"",""numOutputBytes"":""1042685587""},""engineInfo"":""Databricks-Runtime/12.2.x-scala2.12"",""txnId"":""574b172a-7ca2-49e7-a38d-b303d137f06b""}}"
"{""add"":{""path"":""part-00000-a7b1e053-cae2-4056-a911-26b6ab861029-c000.snappy.parquet"",""partitionValues"":{},""size"":130336472,""modificationTime"":1734246392000,""dataChange"":true,""stats"":""{\""numRecords\"":4973764,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T0000BV2KA4QBFY\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.39\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZT7G7RH7CQ6\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1780750,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000000"",""MIN_INSERTION_TIME"":""1734246389000000"",""MAX_INSERTION_TIME"":""1734246389000000"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00001-3e70f537-9b6d-42c1-987a-5d7c6495e340-c000.snappy.parquet"",""partitionValues"":{},""size"":130387432,""modificationTime"":1734246392000,""dataChange"":true,""stats"":""{\""numRecords\"":4973762,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T00004K2J2VJTOT\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.4\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZQ3M54YRMJT\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.98\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1780521,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000001"",""MIN_INSERTION_TIME"":""1734246389000001"",""MAX_INSERTION_TIME"":""1734246389000001"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00002-6b144dcf-63b8-485a-9d9c-ba0ca8f22a7b-c000.snappy.parquet"",""partitionValues"":{},""size"":130315560,""modificationTime"":1734246392000,""dataChange"":true,""stats"":""{\""numRecords\"":4973762,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T00003362PP4YMS\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.38\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZYTLNAR40N5\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.98\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1780722,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000002"",""MIN_INSERTION_TIME"":""1734246389000002"",""MAX_INSERTION_TIME"":""1734246389000002"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00003-38c297a9-1ce6-441a-8d6c-6eb8815f10a7-c000.snappy.parquet"",""partitionValues"":{},""size"":130326438,""modificationTime"":1734246389000,""dataChange"":true,""stats"":""{\""numRecords\"":4973759,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T00000VFHO944EF\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.39\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZK0O34IPFYK\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1778163,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000003"",""MIN_INSERTION_TIME"":""1734246389000003"",""MAX_INSERTION_TIME"":""1734246389000003"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00004-93e6e277-8c81-4952-802e-bef3f1320aed-c000.snappy.parquet"",""partitionValues"":{},""size"":130281782,""modificationTime"":1734246395000,""dataChange"":true,""stats"":""{\""numRecords\"":4973760,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T000035U4BG57TB\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.38\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZSK1C84WZE9\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1780148,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000004"",""MIN_INSERTION_TIME"":""1734246389000004"",""MAX_INSERTION_TIME"":""1734246389000004"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00005-66c82753-43c9-46ef-83e4-7b8223d096cc-c000.snappy.parquet"",""partitionValues"":{},""size"":130384882,""modificationTime"":1734246395000,""dataChange"":true,""stats"":""{\""numRecords\"":4973761,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T000106JHI52M8F\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.38\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZINXWIAP6DH\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.97\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1780358,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000005"",""MIN_INSERTION_TIME"":""1734246389000005"",""MAX_INSERTION_TIME"":""1734246389000005"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00006-cabf1f3b-1b0c-4fd4-b540-2ae9af6293ad-c000.snappy.parquet"",""partitionValues"":{},""size"":130268427,""modificationTime"":1734246394000,""dataChange"":true,""stats"":""{\""numRecords\"":4973762,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T00003M9WZLBA8A\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.39\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZC8H40GRRTX\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1781840,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000006"",""MIN_INSERTION_TIME"":""1734246389000006"",""MAX_INSERTION_TIME"":""1734246389000006"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00007-df0ba4e1-2751-4d46-b5e5-4ef0f5978d56-c000.snappy.parquet"",""partitionValues"":{},""size"":130384594,""modificationTime"":1734246396000,""dataChange"":true,""stats"":""{\""numRecords\"":4973762,\""minValues\"":{\""cust_id\"":\""C007YEYTX9\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-01-01\"",\""txn_id\"":\""T00002Z1ZIRW7D3\"",\""date\"":\""2010-01-01\"",\""year\"":\""2010\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""0.36\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZZS42AMEK\"",\""start_date\"":\""2014-04-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZZZXBQ45X5SZ3\"",\""date\"":\""2020-12-27\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""999.98\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":1777344,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734246389000007"",""MIN_INSERTION_TIME"":""1734246389000007"",""MAX_INSERTION_TIME"":""1734246389000007"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""remove"":{""path"":""part-00000-b4bc66ca-6822-4d95-a690-94f8886b741a-c000.snappy.parquet"",""deletionTimestamp"":1734246400555,""dataChange"":true,""extendedFileMetadata"":true,""partitionValues"":{},""size"":343335674,""tags"":{""INSERTION_TIME"":""1734245837000000"",""MIN_INSERTION_TIME"":""1734245837000000"",""MAX_INSERTION_TIME"":""1734245837000000"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"


In [0]:
df_transactions=df_transactions.repartition(8)



In [0]:
df_transactions.write.format('delta').mode('overwrite').save('dbfs:/FileStore/delta')



In [0]:
#dbutils.fs.rm("dbfs:/FileStore/customers.parquet",True)

Out[18]: True

In [0]:
df_transactions=df_transactions.limit(2000)
df_transactions.show()

+----------+----------+----------+---------------+----------+----+-----+---+-------------------+------+-------------+
|   cust_id|start_date|  end_date|         txn_id|      date|year|month|day|       expense_type|   amt|         city|
+----------+----------+----------+---------------+----------+----+-----+---+-------------------+------+-------------+
|CGITEKW8I0|2010-11-01|2018-03-01|TLL89MHZOLJ3PV5|2014-10-13|2014|   10| 13|           Gambling| 33.32|      seattle|
|C0YDPQWPBJ|2010-12-01|2020-09-01|TWZ4AJFU74WDVC8|2017-09-22|2017|    9| 22|           Clothing|224.31|      seattle|
|C0YDPQWPBJ|2010-10-01|2019-04-01|TR7K1YEO88HDY1W|2015-11-04|2015|   11|  4|      Entertainment| 71.02|     new_york|
|CP665RP38K|2011-08-01|2019-12-01|TC81VTL94WOFOO5|2013-12-05|2013|   12|  5|           Clothing|103.06|     portland|
|CGAO9TU7LZ|2012-12-01|      null|TZKI8PPWPZEN4W3|2019-09-07|2019|    9|  7|      Entertainment|  7.63|    san_diego|
|C0YDPQWPBJ|2010-07-01|2019-06-01|T5ZOKQL23X59QI8|2017-0

In [0]:
df_transactions.write.format('delta').mode('overwrite').save('dbfs:/FileStore/delta')

In [0]:
df_transactions.rdd.getNumPartitions()

Out[21]: 1

In [0]:
display(spark.read.text('dbfs:/user/hive/warehouse/transaction_db.db/transtable/_delta_log/00000000000000000000.json'))

value
"{""commitInfo"":{""timestamp"":1734330911205,""userId"":""7937032294484231"",""userName"":""ghdebasish44@gmail.com"",""operation"":""CREATE OR REPLACE TABLE AS SELECT"",""operationParameters"":{""isManaged"":""true"",""description"":null,""partitionBy"":""[]"",""properties"":""{}""},""notebook"":{""notebookId"":""2026787703974052""},""clusterId"":""1216-045718-rfwode6u"",""isolationLevel"":""WriteSerializable"",""isBlindAppend"":false,""operationMetrics"":{""numFiles"":""10"",""numOutputRows"":""2000"",""numOutputBytes"":""114122""},""engineInfo"":""Databricks-Runtime/12.2.x-scala2.12"",""txnId"":""dd2524fb-4597-4e7a-9af1-7bc7da5ade9d""}}"
"{""protocol"":{""minReaderVersion"":1,""minWriterVersion"":2}}"
"{""metaData"":{""id"":""5fd0fb45-989d-4878-823a-46943711dd78"",""format"":{""provider"":""parquet"",""options"":{}},""schemaString"":""{\""type\"":\""struct\"",\""fields\"":[{\""name\"":\""cust_id\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""start_date\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""end_date\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""txn_id\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""date\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""year\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""month\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""day\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""expense_type\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""amt\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""city\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}}]}"",""partitionColumns"":[],""configuration"":{},""createdTime"":1734330763688}}"
"{""add"":{""path"":""part-00008-57479a5c-2476-48cc-914a-65dcae6ac6b1-c000.snappy.parquet"",""partitionValues"":{},""size"":12241,""modificationTime"":1734330904000,""dataChange"":true,""stats"":""{\""numRecords\"":228,\""minValues\"":{\""cust_id\"":\""C0EFPK9NVV\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-11-01\"",\""txn_id\"":\""T084WZEOURBCD22\"",\""date\"":\""2016-01-01\"",\""year\"":\""2016\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""10.07\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZUI7S0SQV\"",\""start_date\"":\""2013-11-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZVWOD9YKKA01ME\"",\""date\"":\""2016-12-27\"",\""year\"":\""2016\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""99.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":77,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000000"",""MIN_INSERTION_TIME"":""1734330904000000"",""MAX_INSERTION_TIME"":""1734330904000000"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00010-635263c7-7da5-407d-906c-7c3c087144df-c000.snappy.parquet"",""partitionValues"":{},""size"":14660,""modificationTime"":1734330904000,""dataChange"":true,""stats"":""{\""numRecords\"":289,\""minValues\"":{\""cust_id\"":\""C00WRSJF1Q\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-08-01\"",\""txn_id\"":\""T0466NSALWJ0JZK\"",\""date\"":\""2012-01-01\"",\""year\"":\""2012\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""1.88\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZUI7S0SQV\"",\""start_date\"":\""2014-03-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZVQJWWY551G9A6\"",\""date\"":\""2020-12-25\"",\""year\"":\""2020\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""95.18\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":114,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000001"",""MIN_INSERTION_TIME"":""1734330904000001"",""MAX_INSERTION_TIME"":""1734330904000001"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00025-51715bcc-fbca-40e3-a407-26e66b5f6b6a-c000.snappy.parquet"",""partitionValues"":{},""size"":11159,""modificationTime"":1734330905000,""dataChange"":true,""stats"":""{\""numRecords\"":190,\""minValues\"":{\""cust_id\"":\""C00WRSJF1Q\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2019-04-01\"",\""txn_id\"":\""T061HMEN7P16LFH\"",\""date\"":\""2019-01-03\"",\""year\"":\""2019\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""10.16\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZUI7S0SQV\"",\""start_date\"":\""2013-11-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZSFJ0S6CQHIISO\"",\""date\"":\""2019-12-26\"",\""year\"":\""2019\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Savings\"",\""amt\"":\""964.17\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":91,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000002"",""MIN_INSERTION_TIME"":""1734330904000002"",""MAX_INSERTION_TIME"":""1734330904000002"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00026-a4568c4b-4993-493b-9a11-b40a426b7a55-c000.snappy.parquet"",""partitionValues"":{},""size"":12911,""modificationTime"":1734330904000,""dataChange"":true,""stats"":""{\""numRecords\"":239,\""minValues\"":{\""cust_id\"":\""C091KC311O\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-08-01\"",\""txn_id\"":\""T02NA9QXUOSBH8Y\"",\""date\"":\""2017-01-03\"",\""year\"":\""2017\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""10.0\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZUI7S0SQV\"",\""start_date\"":\""2014-03-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZW7KJZ4HYTWCOV\"",\""date\"":\""2017-12-27\"",\""year\"":\""2017\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""98.99\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":77,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000003"",""MIN_INSERTION_TIME"":""1734330904000003"",""MAX_INSERTION_TIME"":""1734330904000003"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00028-1f6d4c5a-8812-4e92-a51c-0a5b62bbe0b7-c000.snappy.parquet"",""partitionValues"":{},""size"":12591,""modificationTime"":1734330905000,""dataChange"":true,""stats"":""{\""numRecords\"":230,\""minValues\"":{\""cust_id\"":\""C091KC311O\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-08-01\"",\""txn_id\"":\""T03TA2812UGLBHV\"",\""date\"":\""2014-01-05\"",\""year\"":\""2014\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""10.55\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZT0HGP7JS\"",\""start_date\"":\""2014-03-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZPQE4WLXTNPXWF\"",\""date\"":\""2014-12-27\"",\""year\"":\""2014\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""95.22\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":90,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000004"",""MIN_INSERTION_TIME"":""1734330904000004"",""MAX_INSERTION_TIME"":""1734330904000004"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00031-5e36726e-a39f-4148-83b1-a961a7b85031-c000.snappy.parquet"",""partitionValues"":{},""size"":12649,""modificationTime"":1734330905000,""dataChange"":true,""stats"":""{\""numRecords\"":235,\""minValues\"":{\""cust_id\"":\""C00WRSJF1Q\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2017-07-01\"",\""txn_id\"":\""T09BFGX7S7YSGPQ\"",\""date\"":\""2013-01-01\"",\""year\"":\""2013\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""10.09\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZNXCIT1YO\"",\""start_date\"":\""2013-10-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZZP097Q3RY0II9\"",\""date\"":\""2013-12-24\"",\""year\"":\""2013\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""98.02\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":86,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000005"",""MIN_INSERTION_TIME"":""1734330904000005"",""MAX_INSERTION_TIME"":""1734330904000005"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"
"{""add"":{""path"":""part-00058-622397b6-8526-4a3d-b714-d5baabfd6c3d-c000.snappy.parquet"",""partitionValues"":{},""size"":11754,""modificationTime"":1734330906000,""dataChange"":true,""stats"":""{\""numRecords\"":207,\""minValues\"":{\""cust_id\"":\""C0I1IG7GEX\"",\""start_date\"":\""2010-01-01\"",\""end_date\"":\""2018-08-01\"",\""txn_id\"":\""T0F02BQBOA6R72L\"",\""date\"":\""2018-01-01\"",\""year\"":\""2018\"",\""month\"":\""1\"",\""day\"":\""1\"",\""expense_type\"":\""Bills and Utilities\"",\""amt\"":\""1.82\"",\""city\"":\""boston\""},\""maxValues\"":{\""cust_id\"":\""CZUI7S0SQV\"",\""start_date\"":\""2014-03-01\"",\""end_date\"":\""2020-11-01\"",\""txn_id\"":\""TZWKQ9FNDD4J13U\"",\""date\"":\""2018-12-26\"",\""year\"":\""2018\"",\""month\"":\""9\"",\""day\"":\""9\"",\""expense_type\"":\""Tax\"",\""amt\"":\""99.67\"",\""city\"":\""seattle\""},\""nullCount\"":{\""cust_id\"":0,\""start_date\"":0,\""end_date\"":86,\""txn_id\"":0,\""date\"":0,\""year\"":0,\""month\"":0,\""day\"":0,\""expense_type\"":0,\""amt\"":0,\""city\"":0}}"",""tags"":{""INSERTION_TIME"":""1734330904000006"",""MIN_INSERTION_TIME"":""1734330904000006"",""MAX_INSERTION_TIME"":""1734330904000006"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"


In [0]:
df_transactions=df_transactions.repartition('year')
df_transactions.rdd.getNumPartitions()
#df_transactions.write.format('delta').mode('overwrite').save('dbfs:/FileStore/delta')

Out[35]: 1

In [0]:
df_transactions.write.format('delta').mode('overwrite').save('dbfs:/FileStore/delta')

In [0]:
df1=spark.read.format('delta').load('dbfs:/FileStore/delta')



In [0]:
display(df1.select('year').distinct())

year
2016
2020
2012
2019
2017
2014
2013
2018
2011
2015


In [0]:
spark.conf.get("spark.sql.adaptive.enabled")

Out[66]: 'false'

In [0]:
spark.conf.set("spark.sql.adaptive.enabled",False)

In [0]:
df_transactions=df_transactions.repartition('year')
df_transactions.rdd.getNumPartitions()

Out[67]: 200

In [0]:
%sql
CREATE  DATABASE TRANSACTION_DB

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-4156791937332809>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-4156791937332809>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;

In [0]:
%sql
CREATE  table IF NOT EXISTS TRANSACTION_DB.trans
--drop table TRANSACTION_DB.trans

In [0]:
%sql
select * from TRANSACTION_DB.trans;

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-4156791937332813>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-4156791937332813>:5[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      3[0m [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[1;32m      4[0m df [38;5;241m=[39m spark[38;5;241m.[39msql(base64[38;5;241m.[39mstandard_b64decode([38;5;124m"[39m[38;5;124mc2VsZWN0ICogZnJvbSBUUkFOU0FDVElPTl9EQi50cmFucw==[39m[38;5;124m"[39m)[38;5;241m.[39mdecode())
[0;32m----> 5[0m [43mdisplay[49m[43m([49m[43mdf[49m

In [0]:
(df_transactions.write.format('delta').mode('overwrite').saveAsTable('TRANSACTION_DB.transtable')
 #repartition('year')
)

In [0]:
%sql
select * from TRANSACTION_DB.transtable limit 100;

cust_id,start_date,end_date,txn_id,date,year,month,day,expense_type,amt,city
C0YDPQWPBJ,2011-03-01,2019-11-01,TZBC21DPJAAKC1V,2016-11-01,2016,11,1,Groceries,53.86,philadelphia
C0YDPQWPBJ,2012-04-01,2020-11-01,TS2NWIIRL9CU46R,2016-01-06,2016,1,6,Entertainment,89.45,seattle
C0YDPQWPBJ,2010-07-01,2019-06-01,TGBI4TY0SEXF9PU,2016-11-13,2016,11,13,Entertainment,6.3,philadelphia
CUFWINIYU0,2012-01-01,2020-01-01,TDRTBY7GGEROXYA,2016-05-01,2016,5,1,Groceries,35.04,seattle
C0YDPQWPBJ,2010-07-01,2018-10-01,T44D8R1F9QULA4F,2016-01-18,2016,1,18,Entertainment,3.26,philadelphia
C0YDPQWPBJ,2010-10-01,2019-04-01,TLFZTXOJLV6W8I5,2016-05-19,2016,5,19,Clothing,140.72,new_york
C0YDPQWPBJ,2010-01-01,2019-07-01,TE2L1B13UZCVK8X,2016-01-01,2016,1,1,Entertainment,18.83,san_francisco
C0YDPQWPBJ,2010-04-01,2020-03-01,TKYM5J1X8XNDW1I,2016-05-24,2016,5,24,Groceries,93.77,portland
C0EFPK9NVV,2012-05-01,,TQCOUA2N62BXEHQ,2016-02-25,2016,2,25,Entertainment,23.37,new_york
C0YDPQWPBJ,2011-09-01,2020-08-01,T41LA2FY34KL6J7,2016-08-02,2016,8,2,Entertainment,31.73,portland


In [0]:
%sql
describe history TRANSACTION_DB.transtable;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2024-12-16T06:38:21.000+0000,7937032294484231,ghdebasish44@gmail.com,DELETE,"Map(predicate -> [""(year#38603 = 2012)""])",,List(2026787703974052),1216-045718-rfwode6u,0.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 14660, numCopiedRows -> 108, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 3984, numDeletedRows -> 181, scanTimeMs -> 3175, numAddedFiles -> 1, numAddedBytes -> 8143, rewriteTimeMs -> 809)",,Databricks-Runtime/12.2.x-scala2.12
0,2024-12-16T06:35:12.000+0000,7937032294484231,ghdebasish44@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(2026787703974052),1216-045718-rfwode6u,,WriteSerializable,False,"Map(numFiles -> 10, numOutputRows -> 2000, numOutputBytes -> 114122)",,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%sql
delete from  TRANSACTION_DB.transtable where year='2012';

num_affected_rows
181


##SCD1

In [0]:
customerChangedJson='[\
    {"id":"4","name":"Deb Ryan","address":"st ##21 GRT"},\
    {"id":"3","name":"Magee Cash","address":"po box ##98 Sclala Road 12"}\
]'



In [0]:
customerChangedDF=spark.read.json(sc.parallelize([customerChangedJson]))
customerChangedDF.show()



In [0]:
customerChangedDF.write.mode("overwrite").format('delta').saveAsTable('customer.customer_data_changed')



In [0]:
%sql
MERGE INTO customer.customer_data C1
USING customer.customer_data_changed C2
ON C1.id=C2.id
  WHEN MATCHED THEN
    UPDATE SET C1.name=C2.name,
                C1.address=C2.address
  WHEN NOT MATCHED THEN
    INSERT *                 



In [0]:
%sql
select * from customer.customer_data



In [0]:
%sql
DESCRIBE HISTORY customer.customer_data



In [0]:
%sql
describe extended customer.customer_data




In [0]:
# from delta.tables import DeltaTable

# #deltaTable=DeltaTable.forPath(spark,'customer.customer_data')



##SCD2

In [0]:
customerJson1='[\
    {"id":"1","name":"Julie Rich","address":"pp #255 Richmond Rd","ingest_date":"2022-03-24"},\
    {"id":"2","name":"Eden Byrd","address":"ag #315 Lexing Rd","ingest_date":"2021-05-14"},\
    {"id":"3","name":"Magee Cash","address":"po box ##33 Vel avn.","ingest_date":"2007-03-09"}\
]'



In [0]:
customerChangedJson1='[\
    {"id":"4","name":"Deb Ryan","address":"st ##21 GRT","ingest_date":"2001-03-29"},\
    {"id":"3","name":"Magee Cash","address":"po box ##98 Sclala Road 12","ingest_date":"2022-03-24"}\
]'



In [0]:
customerDF1=spark.read.json(sc.parallelize([customerJson1]))
customerDF1.show(truncate=False)



In [0]:
from pyspark.sql.functions import lit

customerDF1=customerDF1.withColumn('end_date',lit(''))\
                        .withColumn('status',lit('Current'))\
                            .withColumnRenamed('ingest_date','start_date')
                        
customerDF1.show()




In [0]:
customerDF1.write.mode('overwrite').format('delta').option("overwriteSchema", "true").saveAsTable('customer.customer_data_scd2')



In [0]:
customerChangedDF1=spark.read.json(sc.parallelize([customerChangedJson1]))
customerChangedDF1.show(truncate=False)



In [0]:
customerChangedDF1.write.mode('overwrite').format('delta').saveAsTable('customer.customer_changed_scd2')



In [0]:
%sql
select * from customer.customer_data_scd2



In [0]:
%sql
select * from customer.customer_changed_scd2




In [0]:
%sql
 SELECT  
    a.id as MergeKey,a.id,
    a.address,a.name,a.ingest_date as start_date,
    'Current' as status,
    '' as end_date
    from customer.customer_changed_scd2 a
    UNION ALL
    SELECT  
        null as MergeKey,b.id,
        b.address,b.name,b.ingest_date as start_date,
        'Current' as status,
        '' as end_date
    from customer.customer_changed_scd2 b




In [0]:
%sql
select * from customer.customer_data_scd2



####Merge

In [0]:
%sql

MERGE INTO customer.customer_data_scd2 as updt
USING (
    SELECT  
    a.id as MergeKey,a.id,
    a.address,a.name,a.ingest_date as start_date,
    'Current' as status,
    '' as end_date
    from customer.customer_changed_scd2 a
    UNION ALL
    SELECT  
        null as MergeKey,b.id,
        b.address,b.name,b.ingest_date as start_date,
        'Current' as status,
        '' as end_date
    from customer.customer_changed_scd2 b
) src
ON updt.id=src.MergeKey
WHEN MATCHED 
THEN
      UPDATE SET updt.status='Expired',
            updt.end_date=src.start_date
WHEN NOT MATCHED 
THEN
      INSERT *            




In [0]:
%sql
select * from customer.customer_data_scd2

