#Silver Layer Modeling
With the original data imported to the Databricks environment, we now turn our attention to the modeling of the dataset. This will entail creating a new layer, a silver layer, in which the original schema will be split into multiple tables arranged as a star schema, a type of arrangement in which a single fact table, containing the numeric variables of interest, will be connected to multiple dimentions tables, containing the characteristics of said metrics.

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS silver

Let's start with the fact table. As our primary key, we'll use a generated id, while the ID from the original dataset will identify the DIM_Customer table and be a foreign key here.

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.FACT_Sales (
  --id_sale BIGINT GENERATED ALWAYS AS IDENTITY,
  ID BIGINT,
  fk_customer BIGINT, --FOREIGN KEY (fk_customer) REFERENCES silver.DIM_Customer (id_customer),
  fk_promo BIGINT, --FOREIGN KEY (fk_promo) REFERENCES silver.DIM_Promo (id_promo),  
  fk_date BIGINT, --FOREIGN KEY (fk_date) REFERENCES silver.DIM_Calendar (id_date), 
  -- Quantity Metrics   
  MntWines integer, 
  MntFruits integer, 
  MntMeatProducts integer, 
  MntFishProducts integer, 
  MntSweetProducts integer, 
  MntGoldProds integer, 
  -- Behavior Metrics
  NumDealsPurchases integer, 
  NumWebPurchases integer, 
  NumCatalogPurchases integer, 
  NumStorePurchases integer, 
  NumWebVisitsMonth integer,
  -- Time- related 
  Recency integer)  
USING DELTA;

Then, we created the dimension tables

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.dim_customer (
  id_customer BIGINT GENERATED ALWAYS AS IDENTITY,
  Year_Birth BIGINT,
  Education STRING,
  Marital_Status STRING,
  Kidhome BIGINT,
  Teenhome BIGINT,
  Income BIGINT, -- Mudei do double para INT
  Complain BIGINT, -- faltou l√° no conceitual!
  PRIMARY KEY (id_customer)
)
USING DELTA;

-- Removed
--id_customer BIGINT GENERATED ALWAYS AS IDENTITY,
-- id_customer INT, -- Natural key

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.DIM_Promo (
  id_promo BIGINT GENERATED ALWAYS AS IDENTITY,
  AcceptedCmp1 BIGINT,
  AcceptedCmp2 BIGINT,
  AcceptedCmp3 BIGINT,
  AcceptedCmp4 BIGINT,
  AcceptedCmp5 BIGINT,
  Response BIGINT,
  PRIMARY KEY (id_promo)
)
USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.DIM_Calendar (
  id_date BIGINT GENERATED ALWAYS AS IDENTITY,
  full_date DATE,
  year INT,
  month INT,
  day INT,
  week_of_year INT,
  PRIMARY KEY (id_date)
)
USING DELTA;