In [0]:
%run "/sales_DWH/Includes/Common_function"

In [0]:
## Init access and functions
from pyspark.sql import functions as F
from pyspark.sql.window import Window
get_access_data_lake()

In [0]:
# This class ETL process the silver layer erp_cust_az12
class silver_erp_cust_az12():
    def __init__(self):
        # Base path of the Bronze data in ADLS

        self.base_path = "abfss://bronze@salesdwh.dfs.core.windows.net/"

    def read_data(self):
        # Read data from the specified path in Parquet format
        # Using the schema defined above

        df=spark.read.format("parquet").option("inferSchema", "true").option("samplingRatio", 0.01).option('header','true')\
            .load(f"{self.base_path}/erp_cust_az12/")
        return df
    
    def transform_data(self,df):
        # Data cleaning and transformations
        max_month = df.agg(F.max("month").alias("max_month")).collect()[0]["max_month"]
        df_filter = df.filter(F.col("month") == max_month)
        df_scound=df_filter.dropDuplicates().dropna()
        df_clean2= df_scound.withColumn('cid',F.when(F.col('cid').like('NAS%'),F.substring(F.col('cid'),4,F.length(F.col('cid')))).otherwise(F.col('cid')))\
        .withColumn("bdate",F.when(F.col("bdate") > F.current_date(), None).otherwise(F.col("bdate")))\
        .withColumn("gen",F.when(F.upper(F.trim(F.col("gen"))).isin("F", "FEMALE"), "Female")\
        .when(F.upper(F.trim(F.col("gen"))).isin("M", "MALE"), "Male").otherwise("n/a"))
        return df_clean2
    
    def write_data(self,df):
        # Define merge condition for incremental load
        merge_condition = "tgt.cid=src.cid " 
        
        # Call incremental load function to merge data into Silver table
        load=icremental_load(df,catalog_name="salesdwh_catalog",schema_name="silver",table_name="erp_cust_az12",merge_condition=merge_condition)
        return load
    
    def run(self):
        # Run the full ETL process for silver crm_sales_details data
        print("Starting  Transformation silver_erp_cust_az12......")
        read_data=self.read_data()                              # Step 1: Read data
        transform_data= self.transform_data(read_data)          # Step 2: Clean & transform
        write_data=self.write_data(transform_data)              # Step 3: Write to Silver layer
        return write_data
        print('Done...!!!') 



In [0]:
## Trigger Transformation
start=silver_erp_cust_az12()
start.run()

Starting  Transformation silver_erp_cust_az12......


' Merge completed: [The affected rows = 17012]-----[The updated rows = 17012]----[The inserted rows =0] '