In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *


In [0]:

init_load_flag=int(dbutils.widgets.get("init_load_flag"))

####**Data Reading**

In [0]:
df=spark.sql("select * from databricks_catalog.silver.customers")

In [0]:
df=df.dropDuplicates(subset=["customer_id"])
df.limit(10).display()

customer_id,email,city,state,domains,Full_name
C00001,rushjeff@ryan.org,Johnsonmouth,MS,ryan.org,Emily Mooney
C00002,mccoykiara@kelly.com,Stephenfort,WY,kelly.com,Andrea Sellers
C00003,rebeccamiller@yahoo.com,South Stephenshire,LA,yahoo.com,Craig Hayes
C00004,lawrence05@campbell.info,Chrisland,ND,campbell.info,Bryan Scott
C00005,carrie45@yahoo.com,East Dennistown,RI,yahoo.com,Sean Vasquez
C00006,traceyramos@gmail.com,North Matthew,IN,gmail.com,Kevin Mccarthy
C00007,scottallen@gmail.com,Joneshaven,VA,gmail.com,Amanda Doyle
C00008,sullivanjeremy@horton-adams.com,South Nathanfurt,CT,horton-adams.com,Paul Campos
C00009,dennis03@yahoo.com,Kimberlyview,MD,yahoo.com,Mary Green
C00010,charles58@murillo.net,West Hector,OK,murillo.net,James Myers


# ## **Dividing New Vs Old records**

In [0]:
if init_load_flag==0:
  df_old=spark.sql("select Dimcustomerkey,customer_id,create_date,update_date from databricks_catalog.gold.DimCustomer")

else:
  df_old=spark.sql("select 0 Dimcustomerkey,0 customer_id,0 create_date,0 update_date from databricks_catalog.silver.customers where 1=0")

In [0]:
df_old.display()

Dimcustomerkey,customer_id,create_date,update_date


#####Renaming Columns of df_old

In [0]:
df_old=df_old.withColumnRenamed("Dimcustomerkey","old_DimCustomerKey")\
    .withColumnRenamed("customer_id","old_customer_id")\
    .withColumnRenamed("create_date","old_create_date")\
    .withColumnRenamed("update_date","old_update_date")

#####Applying join with old records

In [0]:
df_join=df.join(df_old,df.customer_id==df_old.old_customer_id,"left")
df_join.display()

customer_id,email,city,state,domains,Full_name,old_DimCustomerKey,old_customer_id,old_create_date,old_update_date
C00001,rushjeff@ryan.org,Johnsonmouth,MS,ryan.org,Emily Mooney,,,,
C00002,mccoykiara@kelly.com,Stephenfort,WY,kelly.com,Andrea Sellers,,,,
C00003,rebeccamiller@yahoo.com,South Stephenshire,LA,yahoo.com,Craig Hayes,,,,
C00004,lawrence05@campbell.info,Chrisland,ND,campbell.info,Bryan Scott,,,,
C00005,carrie45@yahoo.com,East Dennistown,RI,yahoo.com,Sean Vasquez,,,,
C00006,traceyramos@gmail.com,North Matthew,IN,gmail.com,Kevin Mccarthy,,,,
C00007,scottallen@gmail.com,Joneshaven,VA,gmail.com,Amanda Doyle,,,,
C00008,sullivanjeremy@horton-adams.com,South Nathanfurt,CT,horton-adams.com,Paul Campos,,,,
C00009,dennis03@yahoo.com,Kimberlyview,MD,yahoo.com,Mary Green,,,,
C00010,charles58@murillo.net,West Hector,OK,murillo.net,James Myers,,,,


#####Separating New and old records 

In [0]:
df_new=df_join.filter(df_join.old_DimCustomerKey.isNull())
df_old=df_join.filter(df_join.old_DimCustomerKey.isNotNull())

df_new.display()
df_old.display()
                    

customer_id,email,city,state,domains,Full_name,old_DimCustomerKey,old_customer_id,old_create_date,old_update_date
C00001,rushjeff@ryan.org,Johnsonmouth,MS,ryan.org,Emily Mooney,,,,
C00002,mccoykiara@kelly.com,Stephenfort,WY,kelly.com,Andrea Sellers,,,,
C00003,rebeccamiller@yahoo.com,South Stephenshire,LA,yahoo.com,Craig Hayes,,,,
C00004,lawrence05@campbell.info,Chrisland,ND,campbell.info,Bryan Scott,,,,
C00005,carrie45@yahoo.com,East Dennistown,RI,yahoo.com,Sean Vasquez,,,,
C00006,traceyramos@gmail.com,North Matthew,IN,gmail.com,Kevin Mccarthy,,,,
C00007,scottallen@gmail.com,Joneshaven,VA,gmail.com,Amanda Doyle,,,,
C00008,sullivanjeremy@horton-adams.com,South Nathanfurt,CT,horton-adams.com,Paul Campos,,,,
C00009,dennis03@yahoo.com,Kimberlyview,MD,yahoo.com,Mary Green,,,,
C00010,charles58@murillo.net,West Hector,OK,murillo.net,James Myers,,,,


customer_id,email,city,state,domains,Full_name,old_DimCustomerKey,old_customer_id,old_create_date,old_update_date


#####Preparing df_old

In [0]:
#Dropping all columns which are not required

df_old=df_old.drop("old_DimCustomerKey","old_customer_id","old_update_date")



#Renaming old_creat_date column to create_date

df_old=df_old.withColumnRenamed("old_create_date","create_date")
df_old=df_old.withColumn('create_date',to_timestamp(col("create_date")))

#Recreating update_date column  

df_old=df_old.withColumn("update_date",current_timestamp())

In [0]:
df_old.display()

customer_id,email,city,state,domains,Full_name,create_date,update_date


#####Preparing df_new

In [0]:
df_new=df_new.drop("old_DimCustomerKey","old_customer_id","old_create_date","old_update_date")

#Recreating create_date and update_date columns with current timestamp

df_new=df_new.withColumn("create_date",current_timestamp())
df_new=df_new.withColumn("update_date",current_timestamp())

In [0]:
df_new.display()

customer_id,email,city,state,domains,Full_name,create_date,update_date
C00001,rushjeff@ryan.org,Johnsonmouth,MS,ryan.org,Emily Mooney,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00002,mccoykiara@kelly.com,Stephenfort,WY,kelly.com,Andrea Sellers,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00003,rebeccamiller@yahoo.com,South Stephenshire,LA,yahoo.com,Craig Hayes,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00004,lawrence05@campbell.info,Chrisland,ND,campbell.info,Bryan Scott,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00005,carrie45@yahoo.com,East Dennistown,RI,yahoo.com,Sean Vasquez,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00006,traceyramos@gmail.com,North Matthew,IN,gmail.com,Kevin Mccarthy,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00007,scottallen@gmail.com,Joneshaven,VA,gmail.com,Amanda Doyle,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00008,sullivanjeremy@horton-adams.com,South Nathanfurt,CT,horton-adams.com,Paul Campos,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00009,dennis03@yahoo.com,Kimberlyview,MD,yahoo.com,Mary Green,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
C00010,charles58@murillo.net,West Hector,OK,murillo.net,James Myers,2026-01-13T04:39:52.900663Z,2026-01-13T04:39:52.900663Z
