In [0]:
#import libraries

from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable

###CREATING FLAG PARAMETER

In [0]:
dbutils.widgets.text("incremental_flag","0")

In [0]:
#incremental_flag = 0 means first load

incremental_flag = dbutils.widgets.get("incremental_flag")

###CREATING DIMENSION BRANCH  

In [0]:
%sql
SELECT * FROM parquet.`abfss://projects@projectstorageaccount1.dfs.core.windows.net/curated/CAR SALES/`;

Branch_ID,Dealer_ID,Model_ID,Revenue,Units_Sold,Date_ID,Day,Month,Year,BranchName,DealerName,Product_Name,Model_Category,ItemPrice
BR9546,DLR0060,Jee-M10,7223451,1,DT01246,28,5,2020,Premier Motors,"Fisker, Karma Motors",Jeep,Jee,7223451.0
BR9666,DLR0062,Jee-M12,22093020,3,DT01246,30,5,2020,Puma Motors,Ford Australia Motors,Jeep,Jee,7364340.0
BR9726,DLR0063,Jee-M13,22372413,3,DT01247,31,5,2020,Power Ranger Motors,Ford do Brasil Motors,Jeep,Jee,7457471.0
XYZ9726,XYZ0063,ZYXM13,22372413,3,DT01247,31,5,2020,DataFam Motors,Datafam Dealers,Surprise,ZYXM13,7457471.0


###Fetch related columns

In [0]:
dealer_src_df = spark.sql('''
                         SELECT DISTINCT Dealer_ID, DealerName 
                         FROM parquet.`abfss://projects@projectstorageaccount1.dfs.core.windows.net/curated/CAR SALES/`;
                         '''
                        )


In [0]:
dealer_src_df.display()

Dealer_ID,DealerName
DLR0063,Ford do Brasil Motors
XYZ0063,Datafam Dealers
DLR0062,Ford Australia Motors
DLR0060,"Fisker, Karma Motors"


###Dim Dealer sink - Initial and Incremental

In [0]:
if spark.catalog.tableExists("car_sales_catalog.refined.dim_dealer"):
    
    dealer_sink_df  = spark.sql('''
                            SELECT Dim_Dealer_Key, Dealer_ID, DealerName
                            FROM car_sales_catalog.refined.dim_dealer
                            '''
                        )
else:
    dealer_sink_df  = spark.sql('''
                            SELECT 1 as Dim_Dealer_Key, Dealer_ID, DealerName 
                            FROM parquet.`abfss://projects@projectstorageaccount1.dfs.core.windows.net/curated/CAR SALES/`
                            WHERE 1 = 0;
                            '''
                        )

In [0]:
dealer_sink_df.display()

Dim_Dealer_Key,Dealer_ID,DealerName
1,DLR0058,Fiat do Brasil Motors
2,DLR0107,Land Rover Motors
3,DLR0129,Mia Motors
4,DLR0111,Lotus Motors
5,DLR0085,Humber Motors
6,DLR0001,AC Cars Motors
7,DLR0218,Lagonda Motors
8,DLR0082,Honda Motors
9,DLR0063,Ford do Brasil Motors
10,DLR0193,Tazzari Motors


###Filtering New Records and Old Records

In [0]:
df_filter = dealer_src_df.join(dealer_sink_df, dealer_src_df.Dealer_ID == dealer_sink_df.Dealer_ID, how='left') \
                         .select(dealer_src_df.Dealer_ID, dealer_src_df.DealerName, dealer_sink_df.Dim_Dealer_Key)


In [0]:
df_filter.display()

Dealer_ID,DealerName,Dim_Dealer_Key
DLR0063,Ford do Brasil Motors,9.0
XYZ0063,Datafam Dealers,
DLR0062,Ford Australia Motors,219.0
DLR0060,"Fisker, Karma Motors",263.0


**df_filter_old**

In [0]:
df_filter_old = df_filter.filter(col('Dim_Dealer_Key').isNotNull())  

In [0]:
df_filter_old.display()

Dealer_ID,DealerName,Dim_Dealer_Key
DLR0063,Ford do Brasil Motors,9
DLR0062,Ford Australia Motors,219
DLR0060,"Fisker, Karma Motors",263


**df_filter_new**

In [0]:
df_filter_new = df_filter.filter(col('Dim_Dealer_Key').isNull()).select(dealer_src_df.Dealer_ID, dealer_src_df.DealerName)

In [0]:
df_filter_new.display()

Dealer_ID,DealerName
XYZ0063,Datafam Dealers


###CREATE Surrogate key

####Fetch max surrogate key from existing table

In [0]:
if incremental_flag == '0':
    max_value = 1
else:
    max_value = spark.sql("SELECT max(Dim_Dealer_Key) FROM car_sales_catalog.refined.dim_dealer").collect()[0][0] + 1

In [0]:
df_filter_new = df_filter_new.withColumn('Dim_Dealer_Key', max_value + monotonically_increasing_id())

In [0]:
df_filter_new.display()

Dealer_ID,DealerName,Dim_Dealer_Key
XYZ0063,Datafam Dealers,268


###Creating final df - df_filter_old + df_filter_new

In [0]:
df_final = df_filter_new.union(df_filter_old)

In [0]:
df_final.display()

Dealer_ID,DealerName,Dim_Dealer_Key
XYZ0063,Datafam Dealers,268
DLR0063,Ford do Brasil Motors,9
DLR0062,Ford Australia Motors,219
DLR0060,"Fisker, Karma Motors",263


###SCD Type 1 UPSERT

In [0]:
#Incremental run

if spark.catalog.tableExists('car_sales_catalog.refined.dim_dealer'):
    deltaTable = DeltaTable.forPath(spark, "abfss://projects@projectstorageaccount1.dfs.core.windows.net/refined/CAR SALES/dim_dealer")

    deltaTable.alias("target").merge(df_final.alias("source"), "target.Dim_Dealer_Key == source.Dim_Dealer_Key")\
                                .whenMatchedUpdateAll()\
                                .whenNotMatchedInsertAll()\
                                .execute()    

#Initial run
else:
    df_final.write.format("delta")\
                    .mode("overwrite")\
                    .option("path","abfss://projects@projectstorageaccount1.dfs.core.windows.net/refined/CAR SALES/dim_dealer" )\
                    .saveAsTable("car_sales_catalog.refined.dim_dealer")