# Import / Config

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

from IPython.display import Markdown, display

import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.widgets.mermaid_viz as mmw
import edurel.widgets.chatman as cmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"


  from pydantic.v1.fields import FieldInfo as FieldInfoV1


# Database

In [3]:
con = ddbu.con_file(f"{DB_DIR}/adw-etl/adw-etl.duckdb", read_only=False)
# ddbu.exe_sql_file(con, f"{DB_DIR}/adw-etl/schema.sql")
# ddbu.schema_yaml_print(con,additional_fks={})

In [4]:
sql = """
delete from factinternetsales;
delete from factproductinventory;
delete from dimproduct;
delete from dimdate;
delete from dimcustomer;
delete from DimCurrency;
delete from DimPromotion;
"""
ddbu.exe_sql(con, sql)

In [None]:
# con.close()

# ETL

## DimCurrency

In [6]:
sql = """
INSERT INTO DimCurrency (CurrencyKey, CurrencyName)
SELECT
    ROW_NUMBER() OVER (ORDER BY CurrencyCode) AS CurrencyKey,
    Name AS CurrencyName
FROM Currency;
"""
ddbu.exe_sql(con, sql)

In [7]:
sql = """
select * from DimCurrency;
"""
ddbu.sql_print(con, sql)

┌─────────────┬───────────────────────────────┐
│ CurrencyKey │         CurrencyName          │
│    int32    │            varchar            │
├─────────────┼───────────────────────────────┤
│           1 │ Emirati Dirham                │
│           2 │ Afghani                       │
│           3 │ Lek                           │
│           4 │ Armenian Dram                 │
│           5 │ Netherlands Antillian Guilder │
│           6 │ Kwanza                        │
│           7 │ Argentine Peso                │
│           8 │ Shilling                      │
│           9 │ Australian Dollar             │
│          10 │ Aruban Guilder                │
│           · │      ·                        │
│           · │      ·                        │
│           · │      ·                        │
│          96 │ Turkish Lira                  │
│          97 │ Trinidad and Tobago Dollar    │
│          98 │ New Taiwan Dollar             │
│          99 │ US Dollar               

## DimProduct

In [8]:
sql = """
INSERT INTO DimProduct 
SELECT
    ProductID AS ProductKey,
    p.Name AS EnglishProductName,
    sc.name AS EnglishProductSubcategoryName,
    c.name AS EnglishProductCategoryName
FROM Product p
     left join ProductSubcategory sc USING (ProductSubcategoryID)
     left join ProductCategory c USING (ProductCategoryID);
"""
ddbu.exe_sql(con, sql)

In [9]:
sql = """
select * from DimProduct
"""
ddbu.sql_print(con, sql)

┌────────────┬────────────────────────────┬───────────────────────────────┬────────────────────────────┐
│ ProductKey │     EnglishProductName     │ EnglishProductSubcategoryName │ EnglishProductCategoryName │
│   int32    │          varchar           │            varchar            │          varchar           │
├────────────┼────────────────────────────┼───────────────────────────────┼────────────────────────────┤
│        680 │ HL Road Frame - Black, 58  │ Road Frames                   │ Components                 │
│        706 │ HL Road Frame - Red, 58    │ Road Frames                   │ Components                 │
│        707 │ Sport-100 Helmet, Red      │ Helmets                       │ Accessories                │
│        708 │ Sport-100 Helmet, Black    │ Helmets                       │ Accessories                │
│        709 │ Mountain Bike Socks, M     │ Socks                         │ Clothing                   │
│        710 │ Mountain Bike Socks, L     │ Socks      

## DimCustomer

In [10]:
sql = """
with
DimGeography as (
SELECT DISTINCT
    ROW_NUMBER() OVER (
        ORDER BY a.City, sp.StateProvinceCode, cr.CountryRegionCode, a.PostalCode
    ) AS GeographyKey,
    a.City,
    sp.StateProvinceCode,
    sp.Name AS StateProvinceName,
    cr.CountryRegionCode,
    cr.Name AS EnglishCountryRegionName,
    a.PostalCode
FROM Address a
LEFT JOIN StateProvince sp ON a.StateProvinceID = sp.StateProvinceID
LEFT JOIN CountryRegion cr ON sp.CountryRegionCode = cr.CountryRegionCode
),
DimCustomerRaw as (
SELECT
    c.CustomerID AS CustomerKey,
    (
        SELECT g.GeographyKey
        FROM DimGeography g
        INNER JOIN Address a ON
            g.City = a.City
            AND g.PostalCode = a.PostalCode
        INNER JOIN BusinessEntityAddress bea ON a.AddressID = bea.AddressID
        WHERE bea.BusinessEntityID = p.BusinessEntityID
        LIMIT 1
    ) AS GeographyKey,
    p.FirstName,
    p.LastName,
FROM Customer c
LEFT JOIN Person p ON c.PersonID = p.BusinessEntityID)
INSERT INTO DimCustomer 
SELECT 
    CustomerKey,
    FirstName,
    LastName,
    City,
    StateProvinceCode,
    StateProvinceName,
    CountryRegionCode,
    EnglishCountryRegionName,
    PostalCode
FROM DimCustomerRaw dcr
LEFT JOIN DimGeography dg on dcr.GeographyKey = dg.GeographyKey;
"""
ddbu.exe_sql(con, sql)

In [11]:
sql = """
select * from DimCustomer;
"""
ddbu.sql_print(con, sql)

┌─────────────┬───────────┬───────────┬─────────────────┬───────────────────┬───────────────────┬───────────────────┬──────────────────────────┬────────────┐
│ CustomerKey │ FirstName │ LastName  │      City       │ StateProvinceCode │ StateProvinceName │ CountryRegionCode │ EnglishCountryRegionName │ PostalCode │
│    int32    │  varchar  │  varchar  │     varchar     │      varchar      │      varchar      │      varchar      │         varchar          │  varchar   │
├─────────────┼───────────┼───────────┼─────────────────┼───────────────────┼───────────────────┼───────────────────┼──────────────────────────┼────────────┤
│       22154 │ Yolanda   │ Yuan      │ Chalk Riber     │ ON                │ Ontario           │ CA                │ Canada                   │ K0J 1J0    │
│       18546 │ Pieter    │ Wycoff    │ Clarkston       │ GA                │ Georgia           │ US                │ United States            │ 30021      │
│       15338 │ Marie     │ Navarro   │ College Stat

## DimPromotion

In [12]:
sql = """
INSERT INTO DimPromotion (
    PromotionKey,
    EnglishPromotionName,
    DiscountPct,
    EnglishPromotionType,
    EnglishPromotionCategory,
    StartDate,
    EndDate,
    MinQty,
    MaxQty
)
SELECT
    SpecialOfferID AS PromotionKey,
    Description AS EnglishPromotionName,
    DiscountPct,
    Type AS EnglishPromotionType,
    Category AS EnglishPromotionCategory,
    StartDate,
    EndDate,
    MinQty,
    MaxQty
FROM SpecialOffer;
"""
ddbu.exe_sql(con, sql)

In [13]:
sql = """
select * from DimPromotion;
"""
ddbu.sql_print(con, sql)

┌──────────────┬────────────────────────────────────┬─────────────┬──────────────────────┬──────────────────────────┬─────────────────────┬─────────────────────┬────────┬────────┐
│ PromotionKey │        EnglishPromotionName        │ DiscountPct │ EnglishPromotionType │ EnglishPromotionCategory │      StartDate      │       EndDate       │ MinQty │ MaxQty │
│    int32     │              varchar               │    float    │       varchar        │         varchar          │      timestamp      │      timestamp      │ int32  │ int32  │
├──────────────┼────────────────────────────────────┼─────────────┼──────────────────────┼──────────────────────────┼─────────────────────┼─────────────────────┼────────┼────────┤
│            1 │ No Discount                        │         0.0 │ No Discount          │ No Discount              │ 2022-04-30 00:00:00 │ 2025-11-29 00:00:00 │      0 │   NULL │
│            2 │ Volume Discount 11 to 14           │        0.02 │ Volume Discount      │ Reseller 

## DimDate

In [14]:
sql = """
INSERT INTO DimDate (
    DateKey,
    DayNumberOfWeek,
    EnglishDayNameOfWeek,
    DayNumberOfMonth,
    DayNumberOfYear,
    WeekNumberOfYear,
    EnglishMonthName,
    MonthNumberOfYear,
    CalendarQuarter,
    CalendarYear,
    CalendarSemester
)
WITH RECURSIVE date_range AS (
    SELECT DATE '2000-01-01' AS date_value
    UNION ALL
    SELECT date_value + INTERVAL '1 day'
    FROM date_range
    WHERE date_value < DATE '2030-12-31'
)
SELECT
    CAST(STRFTIME(date_value, '%Y%m%d') AS INTEGER) AS DateKey,
    EXTRACT(ISODOW FROM date_value) AS DayNumberOfWeek,
    DAYNAME(date_value) AS EnglishDayNameOfWeek,
    EXTRACT(DAY FROM date_value) AS DayNumberOfMonth,
    EXTRACT(DOY FROM date_value) AS DayNumberOfYear,
    EXTRACT(WEEK FROM date_value) AS WeekNumberOfYear,
    MONTHNAME(date_value) AS EnglishMonthName,
    EXTRACT(MONTH FROM date_value) AS MonthNumberOfYear,
    EXTRACT(QUARTER FROM date_value) AS CalendarQuarter,
    EXTRACT(YEAR FROM date_value) AS CalendarYear,
    CASE
        WHEN EXTRACT(MONTH FROM date_value) <= 6 THEN 1
        ELSE 2
    END AS CalendarSemester
FROM date_range;
"""
ddbu.exe_sql(con, sql)

In [15]:
sql = """
select * from DimDate;
"""
ddbu.sql_print(con, sql)

┌──────────┬─────────────────┬──────────────────────┬──────────────────┬─────────────────┬──────────────────┬──────────────────┬───────────────────┬─────────────────┬──────────────┬──────────────────┐
│ DateKey  │ DayNumberOfWeek │ EnglishDayNameOfWeek │ DayNumberOfMonth │ DayNumberOfYear │ WeekNumberOfYear │ EnglishMonthName │ MonthNumberOfYear │ CalendarQuarter │ CalendarYear │ CalendarSemester │
│  int32   │      int32      │       varchar        │      int32       │      int32      │      int32       │     varchar      │       int32       │      int32      │    int32     │      int32       │
├──────────┼─────────────────┼──────────────────────┼──────────────────┼─────────────────┼──────────────────┼──────────────────┼───────────────────┼─────────────────┼──────────────┼──────────────────┤
│ 20000101 │               6 │ Saturday             │                1 │               1 │               52 │ January          │                 1 │               1 │         2000 │               

## FactInternetSales

In [16]:
sql = """
INSERT INTO FactInternetSales (
    ProductKey,
    OrderDateKey,
    DueDateKey,
    ShipDateKey,
    CustomerKey,
    PromotionKey,
    CurrencyKey,
    SalesOrderNumber,
    SalesOrderLineNumber,
    OrderQuantity,
    UnitPrice,
    SalesAmount
)
SELECT
    sod.ProductID AS ProductKey,
    CAST(STRFTIME(soh.OrderDate, '%Y%m%d') AS INTEGER) AS OrderDateKey,
    CAST(STRFTIME(soh.DueDate, '%Y%m%d') AS INTEGER) AS DueDateKey,
    CAST(STRFTIME(soh.ShipDate, '%Y%m%d') AS INTEGER) AS ShipDateKey,
    soh.CustomerID AS CustomerKey,
    sop.SpecialOfferID AS PromotionKey,
    COALESCE(
        (SELECT c.CurrencyKey
         FROM DimCurrency c
         INNER JOIN CurrencyRate cr ON c.CurrencyName = (
             SELECT Name FROM Currency WHERE CurrencyCode = cr.ToCurrencyCode
         )
         WHERE cr.CurrencyRateID = soh.CurrencyRateID
         LIMIT 1),
        1
    ) AS CurrencyKey,
    soh.SalesOrderNumber,
    sod.SalesOrderDetailID AS SalesOrderLineNumber,
    sod.OrderQty AS OrderQuantity,
    sod.UnitPrice,
    sod.LineTotal AS SalesAmount
FROM SalesOrderHeader soh
INNER JOIN SalesOrderDetail sod ON soh.SalesOrderID = sod.SalesOrderID
INNER JOIN SpecialOfferProduct sop ON
    sod.SpecialOfferID = sop.SpecialOfferID
    AND sod.ProductID = sop.ProductID
WHERE soh.OnlineOrderFlag::bigint = 1;
"""
ddbu.exe_sql(con, sql)

In [17]:
sql = """
select * from FactInternetSales;
"""
ddbu.sql_print(con, sql)

┌────────────┬──────────────┬────────────┬─────────────┬─────────────┬──────────────┬─────────────┬──────────────────┬──────────────────────┬───────────────┬───────────────┬───────────────┐
│ ProductKey │ OrderDateKey │ DueDateKey │ ShipDateKey │ CustomerKey │ PromotionKey │ CurrencyKey │ SalesOrderNumber │ SalesOrderLineNumber │ OrderQuantity │   UnitPrice   │  SalesAmount  │
│   int32    │    int32     │   int32    │    int32    │    int32    │    int32     │    int32    │     varchar      │        int32         │     int32     │ decimal(13,2) │ decimal(13,2) │
├────────────┼──────────────┼────────────┼─────────────┼─────────────┼──────────────┼─────────────┼──────────────────┼──────────────────────┼───────────────┼───────────────┼───────────────┤
│        873 │     20241012 │   20241024 │    20241019 │       14337 │            2 │           1 │ SO57938          │                65057 │             1 │          2.29 │          2.29 │
│        877 │     20241012 │   20241024 │    2024

In [18]:
sql = """
select count(*) from FactInternetSales;
"""
ddbu.sql_print(con, sql)

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        60398 │
└──────────────┘



## FactProductInventory

In [19]:
sql = """
INSERT INTO FactProductInventory (ProductKey, DateKey, Quantity)
SELECT
    pi.ProductID AS ProductKey,
    CAST(STRFTIME(pi.ModifiedDate, '%Y%m%d') AS INTEGER) AS DateKey,
    SUM(pi.Quantity) AS Quantity
FROM ProductInventory pi
GROUP BY pi.ProductID, CAST(STRFTIME(pi.ModifiedDate, '%Y%m%d') AS INTEGER);
"""
ddbu.exe_sql(con, sql)

In [20]:
sql = """
select * from FactProductInventory order by ProductKey;
"""
ddbu.sql_print(con, sql)

┌────────────┬──────────┬──────────┐
│ ProductKey │ DateKey  │ Quantity │
│   int32    │  int32   │  int32   │
├────────────┼──────────┼──────────┤
│          1 │ 20250807 │     1085 │
│          2 │ 20250807 │     1109 │
│          3 │ 20190331 │     1352 │
│          4 │ 20250807 │     1322 │
│        316 │ 20190331 │     1361 │
│        317 │ 20250808 │      593 │
│        318 │ 20250808 │      439 │
│        319 │ 20250808 │      797 │
│        320 │ 20250808 │     1136 │
│        321 │ 20250808 │     1750 │
│         ·  │     ·    │       ·  │
│         ·  │     ·    │       ·  │
│         ·  │     ·    │       ·  │
│        990 │ 20240429 │      153 │
│        991 │ 20240429 │      154 │
│        992 │ 20240429 │      164 │
│        993 │ 20240429 │      153 │
│        994 │ 20240429 │      814 │
│        995 │ 20240429 │      816 │
│        996 │ 20240429 │      970 │
│        997 │ 20240429 │      153 │
│        998 │ 20240429 │      155 │
│        999 │ 20240429 │      194 │
├