In [0]:
#import libraries

from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable

###CREATE FACT TABLE

In [0]:
#Reading Curated data

df_curated = spark.sql('SELECT * FROM parquet.`abfss://projects@projectstorageaccount1.dfs.core.windows.net/curated/CAR SALES/`')

In [0]:
df_curated.display()

Branch_ID,Dealer_ID,Model_ID,Revenue,Units_Sold,Date_ID,Day,Month,Year,BranchName,DealerName,Product_Name,Model_Category,ItemPrice
BR9546,DLR0060,Jee-M10,7223451,1,DT01246,28,5,2020,Premier Motors,"Fisker, Karma Motors",Jeep,Jee,7223451.0
BR9666,DLR0062,Jee-M12,22093020,3,DT01246,30,5,2020,Puma Motors,Ford Australia Motors,Jeep,Jee,7364340.0
BR9726,DLR0063,Jee-M13,22372413,3,DT01247,31,5,2020,Power Ranger Motors,Ford do Brasil Motors,Jeep,Jee,7457471.0
XYZ9726,XYZ0063,ZYXM13,22372413,3,DT01247,31,5,2020,DataFam Motors,Datafam Dealers,Surprise,ZYXM13,7457471.0


####Read all Dim tables

In [0]:
df_model = spark.sql('SELECT * FROM car_sales_catalog.refined.dim_model')
df_branch = spark.sql('SELECT * FROM car_sales_catalog.refined.dim_branch')
df_dealer = spark.sql('SELECT * FROM car_sales_catalog.refined.dim_dealer')

####Bringing keys to the Fact

In [0]:
df_fact = df_curated.join(df_model, df_curated.Model_ID == df_model.Model_ID, 'left')\
                    .join(df_branch, df_curated.Branch_ID == df_branch.Branch_ID, 'left')\
                    .join(df_dealer, df_curated.Dealer_ID == df_dealer.Dealer_ID, 'left')\
                    .select(df_curated.Revenue, df_curated.Units_Sold,df_curated.ItemPrice, df_model.Dim_Model_Key, df_branch.Dim_Branch_Key, df_dealer.Dim_Dealer_Key)        

In [0]:
df_fact.display()

Revenue,Units_Sold,ItemPrice,Dim_Model_Key,Dim_Branch_Key,Dim_Dealer_Key
7223451,1,7223451.0,49,1839,263
22093020,3,7364340.0,227,1837,219
22372413,3,7457471.0,271,1838,9
22372413,3,7457471.0,278,1840,268


In [0]:
if spark.catalog.tableExists('car_sales_catalog.refined.fact_sales'):
    deltaTable = DeltaTable.forPath(spark, "abfss://projects@projectstorageaccount1.dfs.core.windows.net/refined/CAR SALES/fact_sales")

    deltaTable.alias("target").merge(df_fact.alias("source"), "target.Dim_Branch_Key = source.Dim_Branch_Key AND target.Dim_Dealer_Key = source.Dim_Dealer_Key AND target.Dim_Model_Key = source.Dim_Model_Key")\
                                .whenMatchedUpdateAll()\
                                .whenNotMatchedInsertAll()\
                                .execute() 

else:
    df_fact.write.format('delta')\
                    .mode('overwrite')\
                    .option('path','abfss://projects@projectstorageaccount1.dfs.core.windows.net/refined/CAR SALES/fact_sales')\
                    .saveAsTable('car_sales_catalog.refined.fact_sales')