## Incremental Loading


In [0]:
CREATE DATABASE sales;

In [0]:
CREATE TABLE sales.orders (
    OrderID INT PRIMARY KEY,
    OrderDate DATE,
    CustomerID INT,
    CustomerName VARCHAR(100),
    CustomerEmail VARCHAR(150),
    ProductID INT,
    ProductName VARCHAR(100),
    ProductCategory VARCHAR(50),
    RegionID INT,
    RegionName VARCHAR(50),
    Country VARCHAR(50),
    Quantity INT,
    UnitPrice DECIMAL(10, 2),
    TotalAmount DECIMAL(12, 2)
);

In [0]:
INSERT INTO sales.Orders (
    OrderID,
    OrderDate,
    CustomerID,
    CustomerName,
    CustomerEmail,
    ProductID,
    ProductName,
    ProductCategory,
    RegionID,
    RegionName,
    Country,
    Quantity,
    UnitPrice,
    TotalAmount
)
VALUES
(1, '2024-02-01', 101, 'Alice Johnson', 'alice@example.com', 201, 'Laptop', 'Electronics', 301, 'North America', 'USA', 2, 800, 1600),
(2, '2024-02-02', 102, 'Bob Smith', 'bob@example.com', 202, 'Smartphone', 'Electronics', 302, 'Europe', 'Germany', 1, 500, 500),
(3, '2024-02-03', 103, 'Charlie Brown', 'charlie@example.com', 203, 'Tablet', 'Electronics', 303, 'Asia', 'India', 3, 300, 900),
(4, '2024-02-04', 101, 'Alice Johnson', 'alice@example.com', 204, 'Headphones', 'Accessories', 301, 'North America', 'USA', 1, 150, 150),
(5, '2024-02-05', 104, 'David Lee', 'david@example.com', 205, 'Gaming Console', 'Electronics', 302, 'Europe', 'France', 1, 400, 400),
(6, '2024-02-06', 102, 'Bob Smith', 'bob@example.com', 206, 'Smartwatch', 'Electronics', 303, 'Asia', 'China', 2, 200, 400),
(7, '2024-02-07', 105, 'Eve Adams', 'eve@example.com', 201, 'Laptop', 'Electronics', 301, 'North America', 'Canada', 1, 800, 800),
(8, '2024-02-08', 106, 'Frank Miller', 'frank@example.com', 207, 'Monitor', 'Accessories', 302, 'Europe', 'Italy', 2, 250, 500),
(9, '2024-02-09', 107, 'Grace White', 'grace@example.com', 208, 'Keyboard', 'Accessories', 303, 'Asia', 'Japan', 3, 100, 300),
(10, '2024-02-10', 104, 'David Lee', 'david@example.com', 209, 'Mouse', 'Accessories', 301, 'North America', 'USA', 1, 50, 50);

num_affected_rows,num_inserted_rows
10,10


In [0]:
SELECT * FROM sales.orders;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
1,2024-02-01,101,Alice Johnson,alice@example.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
2,2024-02-02,102,Bob Smith,bob@example.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0
3,2024-02-03,103,Charlie Brown,charlie@example.com,203,Tablet,Electronics,303,Asia,India,3,300.0,900.0
4,2024-02-04,101,Alice Johnson,alice@example.com,204,Headphones,Accessories,301,North America,USA,1,150.0,150.0
5,2024-02-05,104,David Lee,david@example.com,205,Gaming Console,Electronics,302,Europe,France,1,400.0,400.0
6,2024-02-06,102,Bob Smith,bob@example.com,206,Smartwatch,Electronics,303,Asia,China,2,200.0,400.0
7,2024-02-07,105,Eve Adams,eve@example.com,201,Laptop,Electronics,301,North America,Canada,1,800.0,800.0
8,2024-02-08,106,Frank Miller,frank@example.com,207,Monitor,Accessories,302,Europe,Italy,2,250.0,500.0
9,2024-02-09,107,Grace White,grace@example.com,208,Keyboard,Accessories,303,Asia,Japan,3,100.0,300.0
10,2024-02-10,104,David Lee,david@example.com,209,Mouse,Accessories,301,North America,USA,1,50.0,50.0


## Data Warehousing 
### - Create Data warehouse from DB

In [0]:
CREATE DATABASE salesDWH;

### Staging Layer

In [0]:
-- Initial Load

CREATE TABLE salesDWH.staging_sales
AS
SELECT * FROM sales.orders;

num_affected_rows,num_inserted_rows


### Transformation

In [0]:
SELECT * FROM salesdwh.staging_sales;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
1,2024-02-01,101,Alice Johnson,alice@example.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
2,2024-02-02,102,Bob Smith,bob@example.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0
3,2024-02-03,103,Charlie Brown,charlie@example.com,203,Tablet,Electronics,303,Asia,India,3,300.0,900.0
4,2024-02-04,101,Alice Johnson,alice@example.com,204,Headphones,Accessories,301,North America,USA,1,150.0,150.0
5,2024-02-05,104,David Lee,david@example.com,205,Gaming Console,Electronics,302,Europe,France,1,400.0,400.0
6,2024-02-06,102,Bob Smith,bob@example.com,206,Smartwatch,Electronics,303,Asia,China,2,200.0,400.0
7,2024-02-07,105,Eve Adams,eve@example.com,201,Laptop,Electronics,301,North America,Canada,1,800.0,800.0
8,2024-02-08,106,Frank Miller,frank@example.com,207,Monitor,Accessories,302,Europe,Italy,2,250.0,500.0
9,2024-02-09,107,Grace White,grace@example.com,208,Keyboard,Accessories,303,Asia,Japan,3,100.0,300.0
10,2024-02-10,104,David Lee,david@example.com,209,Mouse,Accessories,301,North America,USA,1,50.0,50.0


In [0]:
--- Now we have to do any transformation. Like for Example: Multiple the Quantity Column by 2 or real time transformation ( Removing nulls if any)

CREATE VIEW salesDWH.staging_sales_transformed
AS
SELECT * FROM salesdwh.staging_sales
WHERE Quantity IS NOT NULL; -- Same bcoz no NULL values

### Core Layer ( Curated Layer)

In [0]:
CREATE TABLE salesdwh.curated_sales
AS
SELECT * FROM salesdwh.staging_sales_transformed;

num_affected_rows,num_inserted_rows


### DATA WAREHOUSE CORE LAYER DISPLAY

In [0]:
SELECT * FROM salesdwh.curated_sales;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
1,2024-02-01,101,Alice Johnson,alice@example.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
2,2024-02-02,102,Bob Smith,bob@example.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0
3,2024-02-03,103,Charlie Brown,charlie@example.com,203,Tablet,Electronics,303,Asia,India,3,300.0,900.0
4,2024-02-04,101,Alice Johnson,alice@example.com,204,Headphones,Accessories,301,North America,USA,1,150.0,150.0
5,2024-02-05,104,David Lee,david@example.com,205,Gaming Console,Electronics,302,Europe,France,1,400.0,400.0
6,2024-02-06,102,Bob Smith,bob@example.com,206,Smartwatch,Electronics,303,Asia,China,2,200.0,400.0
7,2024-02-07,105,Eve Adams,eve@example.com,201,Laptop,Electronics,301,North America,Canada,1,800.0,800.0
8,2024-02-08,106,Frank Miller,frank@example.com,207,Monitor,Accessories,302,Europe,Italy,2,250.0,500.0
9,2024-02-09,107,Grace White,grace@example.com,208,Keyboard,Accessories,303,Asia,Japan,3,100.0,300.0
10,2024-02-10,104,David Lee,david@example.com,209,Mouse,Accessories,301,North America,USA,1,50.0,50.0


### Now adding 5 more records for Incremental Loading

In [0]:
INSERT INTO sales.orders (
    OrderID,
    OrderDate,
    CustomerID,
    CustomerName,
    CustomerEmail,
    ProductID,
    ProductName,
    ProductCategory,
    RegionID,
    RegionName,
    Country,
    Quantity,
    UnitPrice,
    TotalAmount
)
VALUES
(11, '2024-02-11', 108, 'Hannah Green', 'hannah@example.com', 210, 'Wireless Earbuds', 'Accessories', 302, 'Europe', 'Spain', 2, 120.00, 240.00),
(12, '2024-02-12', 109, 'Ian Black', 'ian@example.com', 201, 'Laptop', 'Electronics', 303, 'Asia', 'India', 1, 800.00, 800.00),
(13, '2024-02-13', 105, 'Eve Adams', 'eve@example.com', 202, 'Smartphone', 'Electronics', 301, 'North America', 'Canada', 1, 500.00, 500.00),
(14, '2024-02-14', 110, 'Jack Wilson', 'jack@example.com', 211, 'External Hard Drive', 'Accessories', 302, 'Europe', 'UK', 2, 150.00, 300.00),
(15, '2024-02-15', 101, 'Alice Johnson', 'alice@example.com', 203, 'Tablet', 'Electronics', 301, 'North America', 'USA', 1, 300.00, 300.00);

num_affected_rows,num_inserted_rows
5,5


In [0]:
SELECT * FROM sales.orders;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
1,2024-02-01,101,Alice Johnson,alice@example.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
2,2024-02-02,102,Bob Smith,bob@example.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0
3,2024-02-03,103,Charlie Brown,charlie@example.com,203,Tablet,Electronics,303,Asia,India,3,300.0,900.0
4,2024-02-04,101,Alice Johnson,alice@example.com,204,Headphones,Accessories,301,North America,USA,1,150.0,150.0
5,2024-02-05,104,David Lee,david@example.com,205,Gaming Console,Electronics,302,Europe,France,1,400.0,400.0
6,2024-02-06,102,Bob Smith,bob@example.com,206,Smartwatch,Electronics,303,Asia,China,2,200.0,400.0
7,2024-02-07,105,Eve Adams,eve@example.com,201,Laptop,Electronics,301,North America,Canada,1,800.0,800.0
8,2024-02-08,106,Frank Miller,frank@example.com,207,Monitor,Accessories,302,Europe,Italy,2,250.0,500.0
9,2024-02-09,107,Grace White,grace@example.com,208,Keyboard,Accessories,303,Asia,Japan,3,100.0,300.0
10,2024-02-10,104,David Lee,david@example.com,209,Mouse,Accessories,301,North America,USA,1,50.0,50.0


### Starting with Staging Layer where we will use CREATE OR REPLACE the staging layer table

In [0]:
-- Incremental Load

CREATE OR REPLACE TABLE salesDWH.staging_sales
AS
SELECT * FROM sales.orders
WHERE OrderDate>'2024-02-10';

num_affected_rows,num_inserted_rows


In [0]:
SELECT * FROM salesdwh.staging_sales;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
11,2024-02-11,108,Hannah Green,hannah@example.com,210,Wireless Earbuds,Accessories,302,Europe,Spain,2,120.0,240.0
12,2024-02-12,109,Ian Black,ian@example.com,201,Laptop,Electronics,303,Asia,India,1,800.0,800.0
13,2024-02-13,105,Eve Adams,eve@example.com,202,Smartphone,Electronics,301,North America,Canada,1,500.0,500.0
14,2024-02-14,110,Jack Wilson,jack@example.com,211,External Hard Drive,Accessories,302,Europe,UK,2,150.0,300.0
15,2024-02-15,101,Alice Johnson,alice@example.com,203,Tablet,Electronics,301,North America,USA,1,300.0,300.0


### Create Transformation LVL View

In [0]:
CREATE VIEW salesDWH.staging_sales_transformed
AS
SELECT * FROM salesdwh.staging_sales
WHERE Quantity IS NOT NULL; -- Same bcoz no NULL values in the table

--- No need to run it as its view it will be updated by default

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-7416001632918902>, line 1[0m
[0;32m----> 1[0m get_ipython()[38;5;241m.[39mrun_cell_magic([38;5;124m'[39m[38;5;124msql[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124mCREATE VIEW salesDWH.staging_sales_transformed[39m[38;5;130;01m\n[39;00m[38;5;124mAS[39m[38;5;130;01m\n[39;00m[38;5;124mSELECT * FROM salesdwh.staging_sales[39m[38;5;130;01m\n[39;00m[38;5;124mWHERE Quantity IS NOT NULL; -- Same bcoz no NULL values in the table[39m[38;5;130;01m\n[39;00m[38;5;130;01m\n[39;00m[38;5;124m--- No need to run it as its view it will be updated by default[39m[38;5;130;01m\n[39;00m[38;5;124m'[39m)

File [0;32m/databricks/python/lib/python3.12/site-packages/IPython/core/interactiveshell.py:2541[0m, in [0;36mInteractiveShell.run_cell_magic

In [0]:
SELECT * FROM salesDWH.staging_sales_transformed;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
11,2024-02-11,108,Hannah Green,hannah@example.com,210,Wireless Earbuds,Accessories,302,Europe,Spain,2,120.0,240.0
12,2024-02-12,109,Ian Black,ian@example.com,201,Laptop,Electronics,303,Asia,India,1,800.0,800.0
13,2024-02-13,105,Eve Adams,eve@example.com,202,Smartphone,Electronics,301,North America,Canada,1,500.0,500.0
14,2024-02-14,110,Jack Wilson,jack@example.com,211,External Hard Drive,Accessories,302,Europe,UK,2,150.0,300.0
15,2024-02-15,101,Alice Johnson,alice@example.com,203,Tablet,Electronics,301,North America,USA,1,300.0,300.0


### Core Layer (Curated)

- Now here we can't directly do 
- CREATE core_table AS SELECT * FROM transformation_view --- because we want overall data not just updated 5 records so instead we will created curated table by passing all the columns and the data from transformation_view

In [0]:
-- CREATE OR REPLACE TABLE salesdwh.curated_sales
-- AS
-- SELECT * FROM salesdwh.staging_sales_transformed;

CREATE TABLE salesdwh.curated_sales (
    OrderID INT PRIMARY KEY,
    OrderDate DATE,
    CustomerID INT,
    CustomerName VARCHAR(100),
    CustomerEmail VARCHAR(150),
    ProductID INT,
    ProductName VARCHAR(100),
    ProductCategory VARCHAR(50),
    RegionID INT,
    RegionName VARCHAR(50),
    Country VARCHAR(50),
    Quantity INT,
    UnitPrice DECIMAL(10, 2),
    TotalAmount DECIMAL(12, 2)
);

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-7416001632918905>, line 1[0m
[0;32m----> 1[0m get_ipython()[38;5;241m.[39mrun_cell_magic([38;5;124m'[39m[38;5;124msql[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124m-- CREATE OR REPLACE TABLE salesdwh.curated_sales[39m[38;5;130;01m\n[39;00m[38;5;124m-- AS[39m[38;5;130;01m\n[39;00m[38;5;124m-- SELECT * FROM salesdwh.staging_sales_transformed;[39m[38;5;130;01m\n[39;00m[38;5;130;01m\n[39;00m[38;5;124mCREATE TABLE salesdwh.curated_sales ([39m[38;5;130;01m\n[39;00m[38;5;124m    OrderID INT PRIMARY KEY,[39m[38;5;130;01m\n[39;00m[38;5;124m    OrderDate DATE,[39m[38;5;130;01m\n[39;00m[38;5;124m    CustomerID INT,[39m[38;5;130;01m\n[39;00m[38;5;124m    CustomerName VARCHAR(100),[39m[38;5;130;01m\n[39;00m[38;5;124m    Custo

In [0]:
INSERT INTO salesdwh.curated_sales
SELECT * FROM salesDWH.staging_sales_transformed;

num_affected_rows,num_inserted_rows
5,5


In [0]:
SELECT * FROM salesdwh.curated_sales;

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
1,2024-02-01,101,Alice Johnson,alice@example.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
2,2024-02-02,102,Bob Smith,bob@example.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0
3,2024-02-03,103,Charlie Brown,charlie@example.com,203,Tablet,Electronics,303,Asia,India,3,300.0,900.0
4,2024-02-04,101,Alice Johnson,alice@example.com,204,Headphones,Accessories,301,North America,USA,1,150.0,150.0
5,2024-02-05,104,David Lee,david@example.com,205,Gaming Console,Electronics,302,Europe,France,1,400.0,400.0
6,2024-02-06,102,Bob Smith,bob@example.com,206,Smartwatch,Electronics,303,Asia,China,2,200.0,400.0
7,2024-02-07,105,Eve Adams,eve@example.com,201,Laptop,Electronics,301,North America,Canada,1,800.0,800.0
8,2024-02-08,106,Frank Miller,frank@example.com,207,Monitor,Accessories,302,Europe,Italy,2,250.0,500.0
9,2024-02-09,107,Grace White,grace@example.com,208,Keyboard,Accessories,303,Asia,Japan,3,100.0,300.0
10,2024-02-10,104,David Lee,david@example.com,209,Mouse,Accessories,301,North America,USA,1,50.0,50.0
