In [0]:
%run "/sales_DWH/Includes/Common_function"

In [0]:
## Init access and functions
from pyspark.sql import functions as F
from pyspark.sql.window import Window
get_access_data_lake()

In [0]:
# This class ETL process the silver layer erp_cust_az12
class silver_erp_loc_a101():
    def __init__(self):
        # Base path of the Bronze data in ADLS

        self.base_path = "abfss://bronze@salesdwh.dfs.core.windows.net/"

    def read_data(self):
        # Read data from the specified path in Parquet format
        # Using the schema defined above

        df=spark.read.format("parquet").option("inferSchema", "true").option("samplingRatio", 0.01).option('header','true')\
            .load(f"{self.base_path}/erp_loc_a101/")
        return df
    
    def transform_data(self,df):
        # Data cleaning and transformations
        max_month = df.agg(F.max("month").alias("max_month")).collect()[0]["max_month"]
        df_filter = df.filter(F.col("month") == max_month)
        df_scound=df_filter.dropDuplicates().dropna()
        df_clean2=df_scound.withColumn("cid", F.regexp_replace(F.col("cid"), "-", "")) \
            .withColumn("cntry",
           F.when(F.trim(F.col("cntry")) == "DE", "Germany")\
            .when(F.trim(F.col("cntry")).isin("US", "USA"), "United States")\
            .when((F.trim(F.col("cntry")) == "") | (F.col("cntry").isNull()), "n/a")\
            .otherwise(F.trim(F.col("cntry")))
       )
        
        return df_clean2
    
    def write_data(self,df):
        # Define merge condition for incremental load
        merge_condition = "tgt.cid=src.cid " 
        
        # Call incremental load function to merge data into Silver table
        load=icremental_load(df,catalog_name="salesdwh_catalog",schema_name="silver",table_name="erp_loc_a101",merge_condition=merge_condition)
        return load
    def run(self):
        # Run the full ETL process for silver crm_sales_details data
        print("Starting  Transformation silver_erp_loc_a101......")
        read_data=self.read_data()                              # Step 1: Read data
        transform_data= self.transform_data(read_data)          # Step 2: Clean & transform
        write_data=self.write_data(transform_data)              # Step 3: Write to Silver layer
        print('Done...!!!') 
        return write_data



In [0]:
## Trigger Transformation
start=silver_erp_loc_a101()
start.run()

Starting  Transformation silver_erp_loc_a101......
Done...!!!


' Merge completed: [The affected rows = 18484]-----[The updated rows = 18484]----[The inserted rows =0] '

In [0]:
%sql
SELECT * FROM salesdwh_catalog.silver.erp_loc_a101

cid,cntry,year,month,create_date
AW00012114,Canada,2025,8,2010-01-01T00:00:00Z
AW00012391,United Kingdom,2025,8,2010-01-01T00:00:00Z
AW00012850,Germany,2025,8,2010-01-01T00:00:00Z
AW00012883,United States,2025,8,2010-01-01T00:00:00Z
AW00013249,Canada,2025,8,2010-01-01T00:00:00Z
AW00013758,Canada,2025,8,2010-01-01T00:00:00Z
AW00013885,United States,2025,8,2010-01-01T00:00:00Z
AW00014096,Canada,2025,8,2010-01-01T00:00:00Z
AW00014292,United States,2025,8,2010-01-01T00:00:00Z
AW00014336,United States,2025,8,2010-01-01T00:00:00Z
