# Load silver layer

This notebook performs the ETL (Extract, Transform, Load) process to 
    populate the 'silver' schema tables from the 'bronze' schema.

In [0]:
%python
import time

start_time = time.time()

#### Insert data into `sales_project.silver.crm_cust_info`

In [0]:
%sql
INSERT OVERWRITE 
    sales_project.silver.crm_cust_info
SELECT
    TRY_CAST(cst_id AS INT) AS cst_id,
    cst_key,
    TRIM(cst_firstname) AS cst_firstname,
    TRIM(cst_lastname) AS cst_lastname,
    CASE
        WHEN UPPER(TRIM(cst_gndr)) = 'M' THEN 'Male'
        WHEN UPPER(TRIM(cst_gndr)) = 'F' THEN 'Female'
        ELSE 'n/a'
    END AS cst_gndr,
    CASE
        WHEN UPPER(TRIM(cst_marital_status)) = 'M' THEN 'Married'
        WHEN UPPER(TRIM(cst_marital_status)) = 'S' THEN 'Single'
        ELSE 'n/a'
    END AS cst_marital_status,
    cst_create_date,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM (
    SELECT
        *,
        ROW_NUMBER() OVER (
            PARTITION BY cst_id
            ORDER BY cst_create_date DESC
        ) AS row_number
    FROM
        sales_project.bronze.crm_cust_info
    WHERE
        cst_id IS NOT NULL
)
WHERE row_number = 1;

num_affected_rows,num_inserted_rows
18484,18484


#### Insert data into `sales_project.silver.crm_prd_info`

In [0]:
%sql
INSERT OVERWRITE 
    sales_project.silver.crm_prd_info
SELECT
    prd_id,
    REPLACE(SUBSTRING(prd_key, 1, 5), '-', '_') AS cat_id,
    SUBSTRING(prd_key, 7, LEN(prd_key)) AS prd_key,
    prd_nm,
    COALESCE(prd_cost, 0) AS prd_cost,
    CASE UPPER(TRIM(prd_line))
        WHEN 'R' THEN 'Road'
        WHEN 'T' THEN 'Touring'
        WHEN 'S' THEN 'Other Sales'
        WHEN 'M' THEN 'Mountain'
        ELSE 'n/a'
    END AS prd_line,
    prd_start_dt,
    LEAD (prd_start_dt) OVER(
        PARTITION BY prd_key
        ORDER BY prd_start_dt
    ) -1 AS prd_end_dt,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM
    sales_project.bronze.crm_prd_info;

num_affected_rows,num_inserted_rows
397,397


#### Insert data into `sales_project.silver.crm_sales_details`

In [0]:
%sql
INSERT OVERWRITE 
    sales_project.silver.crm_sales_details 
SELECT
    sls_ord_num,
    sls_prd_key,
    CAST(sls_cust_id AS INT) AS sls_cust_id,
    CASE
        WHEN sls_order_dt < 0 OR LEN(sls_order_dt) !=8 THEN NULL 
        ELSE TO_DATE(CAST(sls_order_dt AS STRING), 'yyyyMMdd')
    END AS sls_order_dt,
    CASE
        WHEN sls_ship_dt < 0 OR LEN(sls_ship_dt) !=8 THEN NULL 
        ELSE TO_DATE(CAST(sls_ship_dt AS STRING), 'yyyyMMdd')
    END AS sls_ship_dt,
    CASE
        WHEN sls_due_dt < 0 OR LEN(sls_due_dt) !=8 THEN NULL 
        ELSE TO_DATE(CAST(sls_due_dt AS STRING), 'yyyyMMdd')
    END AS sls_due_dt,
    CASE
        WHEN sls_sales IS NULL OR sls_sales <= 0 OR sls_sales != (sls_quantity * ABS(sls_price)) 
            THEN sls_quantity * ABS(sls_price)
        ELSE sls_sales
    END AS sls_sales,
    sls_quantity,
    CASE
        WHEN sls_price IS NULL OR sls_price <= 0 
            THEN sls_sales / NULLIF(sls_quantity, 0)
        ELSE sls_price
    END AS sls_price,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM 
    sales_project.bronze.crm_sales_details;

num_affected_rows,num_inserted_rows
60398,60398


#### Insert data into `sales_project.silver.erp_cust_az_12`

In [0]:
%sql
INSERT OVERWRITE
    sales_project.silver.erp_cust_az_12
SELECT 
    CASE
        WHEN cid LIKE 'NAS%' THEN SUBSTR(cid, 4, LEN(cid))
        ELSE cid
    END AS cid,
    CASE
        WHEN bdate > GETDATE() THEN NULL
        ELSE bdate
    END AS bdate,
    CASE
        WHEN UPPER(TRIM(gen)) IN ('F', 'FEMALE') THEN 'Female'
        WHEN UPPER(TRIM(gen)) IN ('M', 'MALE') THEN 'Male'
        ELSE 'n/a'
    END AS gen,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM 
    sales_project.bronze.erp_cust_az_12;

num_affected_rows,num_inserted_rows
18484,18484


#### Insert data into `sales_project.silver.erp_loc_a_101`

In [0]:
%sql
INSERT OVERWRITE
    sales_project.silver.erp_loc_a_101
SELECT
    REPLACE(cid, '-', '') AS cid,
    CASE
        WHEN UPPER(TRIM(cntry)) IN ('USA', 'UNITED STATES', 'US') THEN 'United States'
        WHEN UPPER(TRIM(cntry)) IN ('DE', 'GERMANY') THEN 'Germany'
        WHEN TRIM(cntry) = '' OR cntry IS NULL THEN 'n/a'
        ELSE TRIM(cntry)
    END AS country,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM
    sales_project.bronze.erp_loc_a_101;

num_affected_rows,num_inserted_rows
18484,18484


#### Insert data into `sales_project.silver.erp_px_cat_g_1_v_2`

In [0]:
INSERT OVERWRITE
    sales_project.silver.erp_px_cat_g_1_v_2
SELECT
    id,
    cat,
    subcat,
    maintenance,
    CURRENT_TIMESTAMP() AS dwh_create_date
FROM
    sales_project.bronze.erp_px_cat_g_1_v_2;

num_affected_rows,num_inserted_rows
37,37


In [0]:
%python
import time

end_time = time.time()
duration = end_time - start_time
print(f"Execution time: {round(duration,2)} seconds")

Execution time: 20.35599422454834 seconds
