# Quality checks for 'Silver' layer

Script Purpose:<br>
    This script performs various quality checks for data consistency, accuracy, 
    and standardization across the 'silver' layer. It includes checks for:<br>
    - Null or duplicate primary keys.<br>
    - Unwanted spaces in string fields.<br>
    - Data standardization and consistency.<br>
    - Invalid date ranges and orders.<br>
    - Data consistency between related fields.

Usage Notes:
    - Run these checks after data loading Silver Layer.
    - Investigate and resolve any discrepancies found during the checks.

## Checking `sales_project.silver.crm_cust_info`

#### Check for nulls or duplicates in primary key
Expectation: no results

In [0]:
%sql
SELECT 
    cst_id,
    COUNT(*)
FROM 
    sales_project.silver.crm_cust_info
GROUP BY 
    cst_id
HAVING 
    COUNT(*) > 1 
    OR cst_id IS NULL;

cst_id,COUNT(*)


#### Check for unwanted spaces
Expectation: no results

In [0]:
SELECT 
    *
FROM 
    sales_project.silver.crm_cust_info
WHERE 
    cst_firstname != TRIM(cst_firstname) 
    OR cst_lastname != TRIM(cst_lastname);

cst_id,cst_key,cst_firstname,cst_lastname,cst_gndr,cst_marital_status,cst_create_date,dwh_create_date


#### Data standardization and consistency
Expectation: 'Married' and 'Single'

In [0]:
SELECT
    DISTINCT cst_marital_status
FROM
    sales_project.silver.crm_cust_info;

cst_marital_status
Married
Single


## Checking `sales_project.silver.crm_prd_info`

#### Check for nulls or duplicates in primary key
Expectation: no results

In [0]:
%sql
SELECT 
    prd_id, COUNT(*)
FROM
    sales_project.silver.crm_prd_info
GROUP BY 
    prd_id
HAVING 
    COUNT(*) > 1 
    OR prd_id IS NULL;

prd_id,COUNT(*)


#### Check for unwanted spaces
Expectation: no results

In [0]:
%sql
SELECT
    *
FROM 
    sales_project.silver.crm_prd_info
WHERE
    prd_nm != TRIM(prd_nm);


prd_id,cat_id,prd_key,prd_nm,prd_cost,prd_line,prd_start_dt,prd_end_dt,dwh_create_date


#### Check for nulls or negatives
Expectation: no results

In [0]:
SELECT 
    *
FROM 
    sales_project.silver.crm_prd_info
WHERE 
    prd_cost IS NULL 
    OR prd_cost < 0;

prd_id,cat_id,prd_key,prd_nm,prd_cost,prd_line,prd_start_dt,prd_end_dt,dwh_create_date


#### Data standardization and consistency

In [0]:
%sql
SELECT DISTINCT 
    prd_line 
FROM 
    sales_project.silver.crm_prd_info;

prd_line
Mountain
Road
Other Sales
Touring
""


#### Check for Invalid Date Orders

In [0]:
SELECT 
    * 
FROM 
    sales_project.silver.crm_prd_info
WHERE
    prd_end_dt < prd_start_dt;

## Checking `sales_project.silver.crm_sales_details`

#### Check for Invalid Dates
Expectation: No Invalid Dates

In [0]:
%sql
SELECT 
    NULLIF(sls_order_dt, 0) AS sls_order_dt 
FROM sales_project.bronze.crm_sales_details
WHERE sls_order_dt <= 0 
    OR LEN(sls_order_dt) != 8 
    OR sls_order_dt > 20500101 
    OR sls_order_dt < 19000101;


sls_order_dt
""
""
""
""
""
32154.0
""
""
""
""


#### Check for Invalid Date Orders
Expectation: No Results


In [0]:
%sql
SELECT
    *
FROM
    sales_project.silver.crm_sales_details
WHERE
    sls_order_dt > sls_ship_dt
    OR sls_ship_dt > sls_due_dt;

sls_ord_num,sls_prd_key,sls_cust_id,sls_order_dt,sls_ship_dt,sls_due_dt,sls_sales,sls_quantity,sls_price


#### Check Data Consistency: Sales = Quantity * Price
Expectation: No Results


In [0]:
%sql
SELECT DISTINCT
    sls_sales, 
    sls_quantity, 
    sls_price
FROM
    sales_project.silver.crm_sales_details
WHERE
    sls_quantity * sls_price != sls_sales
    OR sls_sales <= 0
    OR sls_quantity <= 0
    OR sls_price <= 0
    OR sls_sales IS NULL
    OR sls_quantity IS NULL
    OR sls_price IS NULL
ORDER BY 
    sls_sales, 
    sls_quantity, 
    sls_price;

sls_sales,sls_quantity,sls_price


## Checking `sales_project.bronze.erp_cust_az_12`

In [0]:
%sql
SELECT *
FROM 
    sales_project.silver.erp_cust_az_12
WHERE 
    cid not like 'AW%';

cid,bdate,gen,dwh_create_date


#### Identify out of range dates

In [0]:
%sql
SELECT *
FROM 
    sales_project.silver.erp_cust_az_12
WHERE 
    bdate < '1930-01-01' 
    OR bdate > GETDATE()
ORDER BY 
    bdate;

cid,bdate,gen,dwh_create_date
AW00012725,1916-02-10,Female,2026-01-05T21:28:24.154Z
AW00016455,1917-02-09,Female,2026-01-05T21:28:24.154Z
AW00014841,1917-06-05,Male,2026-01-05T21:28:24.154Z
AW00018034,1917-09-20,Male,2026-01-05T21:28:24.154Z
AW00014992,1918-02-11,Male,2026-01-05T21:28:24.154Z
AW00025527,1918-11-08,Male,2026-01-05T21:28:24.154Z
AW00016471,1919-02-14,Female,2026-01-05T21:28:24.154Z
AW00016325,1919-03-10,Female,2026-01-05T21:28:24.154Z
AW00028743,1920-11-14,Female,2026-01-05T21:28:24.154Z
AW00024713,1922-01-02,Male,2026-01-05T21:28:24.154Z


#### Data Standardization and Consistency

In [0]:
SELECT DISTINCT
    gen
FROM 
    sales_project.silver.erp_cust_az_12;

gen
Male
Female
""


## Checking `sales_project.bronze.erp_loc_a_101`

#### Data Standardization & Consistency

In [0]:
%sql
SELECT DISTINCT
    cntry
FROM
    sales_project.silver.erp_loc_a_101;

cntry
United Kingdom
Australia
Canada
France
United States
""
Germany


## Checking `sales_project.silver.erp_px_cat_g_1_v_2`

#### Check for unwanted spaces
Expectation: no results

In [0]:
%sql
SELECT
    *
FROM
    sales_project.silver.erp_px_cat_g_1_v_2
WHERE 
    id != TRIM(id) 
    OR cat != TRIM(cat) 
    OR subcat != TRIM(subcat) 
    OR maintenance != TRIM(maintenance);

id,cat,subcat,maintenance


#### Data Standardization & Consistency

In [0]:
SELECT DISTINCT
    cat
FROM 
    sales_project.silver.erp_px_cat_g_1_v_2;

cat
Clothing
Components
Accessories
Bikes
