# nb_02_tratamento_silver

Objetivo: realizar o tratamento, limpeza e padronização dos dados
da camada Bronze, preparando-os para a modelagem analítica na
camada Silver.


In [0]:
%sql
USE CATALOG mvp_catalog;

CREATE SCHEMA IF NOT EXISTS silver;
CREATE SCHEMA IF NOT EXISTS silver_base;


#VIEWS

In [0]:
%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.customer AS
SELECT
    NULLIF(CAST(customerid AS STRING), 'NULL') AS customerid,
    NULLIF(CAST(personid AS STRING), 'NULL') AS personid
FROM mvp_catalog.bronze.customer;


In [0]:

%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.person AS
SELECT 

    try_cast(businessentityid AS INT) AS businessentityid,
    firstname,
    lastname,
    persontype

FROM mvp_catalog.bronze.person;


In [0]:
%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.email_address AS
SELECT
    NULLIF(CAST(businessentityid AS STRING), 'NULL') AS businessentityid,
    emailaddress
FROM mvp_catalog.bronze.email_address;


In [0]:
%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.sales_order_header AS
SELECT
    NULLIF(CAST(salesorderid AS STRING), 'NULL') AS salesorderid,
    NULLIF(CAST(customerid AS STRING), 'NULL') AS customerid,
    NULLIF(CAST(territoryid AS STRING), 'NULL') AS territoryid,
    orderdate
FROM mvp_catalog.bronze.sales_order_header;




In [0]:
%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.sales_order_detail AS
SELECT
    NULLIF(CAST(salesorderdetailid AS STRING), 'NULL') AS salesorderdetailid,
    NULLIF(CAST(salesorderid AS STRING), 'NULL') AS salesorderid,
    NULLIF(CAST(productid AS STRING), 'NULL') AS productid,
    NULLIF(CAST(orderqty AS STRING), 'NULL') AS orderqty,
    NULLIF(CAST(unitprice AS STRING), 'NULL') AS unitprice,
    NULLIF(CAST(linetotal AS STRING), 'NULL') AS linetotal
FROM mvp_catalog.bronze.sales_order_detail;




In [0]:
%sql
CREATE OR REPLACE VIEW mvp_catalog.silver_base.sales_territory AS
SELECT
    NULLIF(CAST(territoryid AS STRING), 'NULL') AS territoryid,
    countryregioncode,
    name,
    group
FROM mvp_catalog.bronze.sales_territory;


In [0]:
%sql
SHOW VIEWS IN mvp_catalog.silver_base;


#TABELAS TRATADAS

In [0]:
%sql
CREATE OR REPLACE TABLE silver.product
USING DELTA
AS
SELECT
    try_cast(productid AS INT) AS product_id,
    name AS product_name,
    productnumber AS product_number,
    try_cast(standardcost AS DECIMAL(18,2)) AS standard_cost,
    try_cast(listprice AS DECIMAL(18,2)) AS list_price,
    try_cast(productsubcategoryid AS INT) AS product_subcategory_id,
    try_cast(sellstartdate AS DATE) AS sell_start_date,
    try_cast(sellenddate AS DATE) AS sell_end_date
FROM bronze.product;




In [0]:
%sql
CREATE OR REPLACE TABLE silver.product_subcategory
USING DELTA
AS
SELECT
    try_cast(productsubcategoryid AS INT) AS product_subcategory_id,
    name AS subcategory_name,
    try_cast(productcategoryid AS INT) AS product_category_id
FROM bronze.product_subcategory;

In [0]:
%sql
CREATE OR REPLACE TABLE silver.product_category
USING DELTA
AS
SELECT
    try_cast(productcategoryid AS INT) AS product_category_id,
    name AS category_name
FROM bronze.product_category;

In [0]:
%sql
CREATE OR REPLACE TABLE silver.customer
USING DELTA
AS
SELECT
    try_cast(c.customerid AS INT) AS customer_id,
    try_cast(c.personid AS INT) AS person_id,
    p.firstname AS first_name,
    p.lastname AS last_name,
    p.persontype AS person_type,
    ea.emailaddress AS email_address
FROM silver_base.customer c
LEFT JOIN silver_base.person p
    ON try_cast(c.personid AS INT) = try_cast(p.businessentityid AS INT)
LEFT JOIN silver_base.email_address ea
    ON try_cast(p.businessentityid AS INT) = try_cast(ea.businessentityid AS INT);


In [0]:
%sql
CREATE OR REPLACE TABLE silver.sales_territory
USING DELTA
AS
SELECT
    try_cast(territoryid AS INT) AS territory_id,
    name AS territory_name,
    countryregioncode AS country_region_code,
    group AS territory_group
FROM silver_base.sales_territory;



#FATO


In [0]:
%sql
CREATE TABLE silver.sales
USING DELTA
AS
SELECT
    try_cast(h.salesorderid AS INT) AS sales_order_id,
    try_cast(d.salesorderdetailid AS INT) AS sales_order_detail_id,
    h.orderdate AS order_date,
    try_cast(h.customerid AS INT) AS customer_id,
    try_cast(h.territoryid AS INT) AS territory_id,
    try_cast(d.productid AS INT) AS product_id,
    try_cast(d.orderqty AS INT) AS order_qty,
    try_cast(d.unitprice AS DECIMAL(18,2)) AS unit_price,
    try_cast(d.linetotal AS DECIMAL(18,2)) AS line_total
FROM silver_base.sales_order_header h
JOIN silver_base.sales_order_detail d
  ON try_cast(h.salesorderid AS INT)
   = try_cast(d.salesorderid AS INT);







#VALIDAÇÃO

In [0]:
%sql
SHOW TABLES IN silver;


In [0]:
%sql
SELECT COUNT(*) AS total_vendas FROM silver.sales;


In [0]:
%sql
SELECT
  COUNT(*) AS vendas_sem_cliente
FROM silver.sales s
LEFT JOIN silver.customer c
  ON s.customer_id = c.customer_id
WHERE c.customer_id IS NULL;


In [0]:
%sql
SELECT
  SUM(CASE WHEN sales_order_id IS NULL THEN 1 ELSE 0 END) AS null_sales_order_id,
  SUM(CASE WHEN product_id IS NULL THEN 1 ELSE 0 END) AS null_product_id,
  SUM(CASE WHEN order_date IS NULL THEN 1 ELSE 0 END) AS null_order_date
FROM silver.sales;


In [0]:
%sql
SELECT *
FROM silver.sales
LIMIT 10;


In [0]:
%sql
SHOW VIEWS IN silver_base