## Steps

**1. Read the Silver layer data**

**2. Read all the dim layer table**

**3. Join and select the necessary columns**

**4. Check and verify correct or not**

**5. Insert and Update in container and catalog**

#### 1. Read the silver layer data

In [0]:
from pyspark.sql.functions import *

In [0]:
silver_layer_data = spark.sql("SELECT * FROM PARQUET.`abfss://silver@firstendtoenddeproject.dfs.core.windows.net/carsales`")

#### 2. Read all the dim table

In [0]:
dim_model_table = spark.sql("SELECT * FROM carsales.gold.model_dim_table")

dim_branch_table = spark.sql("SELECT * FROM carsales.gold.branch_dim_table")

dim_dealer_table = spark.sql("SELECT * FROM carsales.gold.dealer_dim_table")

dim_date_table = spark.sql("SELECT * FROM carsales.gold.date_dim_table")


#### 3. Join and select the necessary columns

In [0]:
fact_table = silver_layer_data\
    .join(dim_model_table, silver_layer_data.Model_ID == dim_model_table.Model_ID, "left")\
    .join(dim_branch_table, silver_layer_data.Branch_ID == dim_branch_table.Branch_ID, "left")\
    .join(dim_dealer_table, silver_layer_data.Dealer_ID == dim_dealer_table.Dealer_ID, "left")\
    .join(dim_date_table, silver_layer_data.Date_ID == dim_date_table.Date_ID, "left")\
    .select(silver_layer_data['Revenue'], silver_layer_data['Units_Sold'], silver_layer_data['Revenue_per_Unit'],
            dim_model_table['dim_model_id'], dim_branch_table['dim_branch_id'], dim_dealer_table['dim_dealer_id'], dim_date_table['dim_date_id'])

In [0]:
fact_table.display()

Revenue,Units_Sold,Revenue_per_Unit,dim_model_id,dim_branch_id,dim_dealer_id,dim_date_id
13363978,2,6681989.0,22,465,118,666
17376468,3,5792156.0,110,1220,179,666
9664767,3,3221589.0,97,588,131,693
5525304,3,1841768.0,25,5,208,693
12971088,3,4323696.0,214,407,223,192
7321228,1,7321228.0,247,1813,171,597
11379294,2,5689647.0,125,267,214,597
11611234,2,5805617.0,225,847,63,745
19979446,2,9989723.0,5,1178,267,745
14181510,3,4727170.0,23,1178,129,800


#### 5. Insert and Update in container and catalog

In [0]:
from delta.tables import DeltaTable

In [0]:
if spark.catalog.tableExists('carsales.gold.fact_table'):
  delta_tab = DeltaTable.forPath(spark, "abfss://gold@firstendtoenddeproject.dfs.core.windows.net/fact_table")

  delta_tab.alias('trg')\
    .merge(fact_table.alias('src'), "trg.dim_model_id = src.dim_model_id AND trg.dim_branch_id = src.dim_branch_id AND trg.dim_dealer_id = src.dim_dealer_id AND trg.dim_date_id = src.dim_date_id")\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()\
    .execute()
else:
  fact_table.write\
    .format("delta")\
    .mode("overwrite")\
    .option("path", "abfss://gold@firstendtoenddeproject.dfs.core.windows.net/fact_table")\
    .saveAsTable("carsales.gold.fact_table")

In [0]:
%sql
SELECT * FROM carsales.gold.fact_table

Revenue,Units_Sold,Revenue_per_Unit,dim_model_id,dim_branch_id,dim_dealer_id,dim_date_id
13363978,2,6681989.0,22,465,118,666
17376468,3,5792156.0,110,1220,179,666
9664767,3,3221589.0,97,588,131,693
5525304,3,1841768.0,25,5,208,693
12971088,3,4323696.0,214,407,223,192
7321228,1,7321228.0,247,1813,171,597
11379294,2,5689647.0,125,267,214,597
11611234,2,5805617.0,225,847,63,745
19979446,2,9989723.0,5,1178,267,745
14181510,3,4727170.0,23,1178,129,800
