# Import / Config

In [1]:
%load_ext autoreload
%autoreload 2

In [71]:
from pathlib import Path
from edurel.utils.duck_utils import *

BASE_DIR = "/home/basis/work/github/edurel"
DB_DIR = f"{BASE_DIR}/databases/"
parquet_dir = f"{DB_DIR}/db-adw-oltp/tmp/parquet"
parquet_clean_dir = f"{DB_DIR}/db-adw-oltp/tmp/parquet-clean"



# Create Schema

In [None]:
# con = duckdb_mem_con()

In [None]:
# con.close()

In [76]:
on = True

## Address

In [6]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Address (
AddressID INT NOT NULL primary key, 
AddressLine1 TEXT NOT NULL, 
AddressLine2 TEXT NULL, 
City TEXT NOT NULL, 
StateProvinceID INT NOT NULL, 
PostalCode TEXT NOT NULL, 
SpatialLocation TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Address
    SELECT * FROM read_parquet('{parquet_dir}/Address.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Address LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Address;")   

if on: con.close()

┌───────────┬──────────────────────┬──────────────┬─────────┬─────────────────┬────────────┬──────────────────────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ AddressID │     AddressLine1     │ AddressLine2 │  City   │ StateProvinceID │ PostalCode │               SpatialLocation                │               rowguid                │    ModifiedDate     │
│   int32   │       varchar        │   varchar    │ varchar │      int32      │  varchar   │                   varchar                    │               varchar                │      timestamp      │
├───────────┼──────────────────────┼──────────────┼─────────┼─────────────────┼────────────┼──────────────────────────────────────────────┼──────────────────────────────────────┼─────────────────────┤
│         1 │ 1970 Napa Ct.        │ NULL         │ Bothell │              79 │ 98011      │ E6100000010CAE8BFC28BCE4474067A89189898A5EC0 │ 9AADCB0D-36CF-483F-84D8-585C2D4EC6E9 │ 2018-12-03 00:00:

## AddressType

In [7]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE AddressType (
AddressTypeID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO AddressType
    SELECT * FROM read_parquet('{parquet_dir}/AddressType.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM AddressType LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM AddressType;")   

if on: con.close()

┌───────────────┬─────────────┬──────────────────────────────────────┬─────────────────────┐
│ AddressTypeID │    Name     │               rowguid                │    ModifiedDate     │
│     int32     │   varchar   │               varchar                │      timestamp      │
├───────────────┼─────────────┼──────────────────────────────────────┼─────────────────────┤
│             1 │ Billing     │ B84F78B1-4EFE-4A0E-8CB7-70E9F112F886 │ 2019-04-30 00:00:00 │
│             2 │ Home        │ 41BC2FF6-F0FC-475F-8EB9-CEC0805AA0F2 │ 2019-04-30 00:00:00 │
│             3 │ Main Office │ 8EEEC28C-07A2-4FB9-AD0A-42D4A0BBC575 │ 2019-04-30 00:00:00 │
│             4 │ Primary     │ 24CB3088-4345-47C4-86C5-17B535133D1E │ 2019-04-30 00:00:00 │
│             5 │ Shipping    │ B29DA3F8-19A3-47DA-9DAA-15C84F4A83A5 │ 2019-04-30 00:00:00 │
└───────────────┴─────────────┴──────────────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│

## Product

In [8]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Product (
ProductID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
ProductNumber TEXT NOT NULL, 
MakeFlag BIT NOT NULL , 
FinishedGoodsFlag BIT NOT NULL, 
Color TEXT NULL, 
SafetyStockLevel SMALLINT NOT NULL, 
ReorderPoint SMALLINT NOT NULL, 
StandardCost DECIMAL(18,4) NOT NULL, 
ListPrice DECIMAL(18,4) NOT NULL, 
Size TEXT NULL, 
SizeUnitMeasureCode TEXT NULL, 
WeightUnitMeasureCode TEXT NULL, 
Weight DECIMAL(8, 2) NULL, 
DaysToManufacture INT NOT NULL, 
ProductLine TEXT NULL, 
Class TEXT NULL, 
Style TEXT NULL, 
ProductSubcategoryID INT NULL, 
ProductModelID INT NULL, 
SellStartDate TIMESTAMP NOT NULL, 
SellEndDate TIMESTAMP NULL, 
DiscontinuedDate TIMESTAMP NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Product
    SELECT *  FROM read_parquet('{parquet_dir}/Product.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Product LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Product;")   

if on: con.close()

┌───────────┬───────────────────────┬───────────────┬──────────────────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────┬─────────┬──────────────────┬──────────────┬───────────────┬───────────────┬─────────┬─────────────────────┬───────────────────────┬──────────────┬───────────────────┬─────────────┬─────────┬─────────┬──────────────────────┬────────────────┬─────────────────────┬─────────────┬──────────────────┬──────────────────────────────────────┬─────────────────────────┐
│ ProductID │         Name          │ ProductNumber │                             MakeFlag                             │                        FinishedGoodsFlag                         │  Color  │ SafetyStockLevel │ ReorderPoint │ StandardCost  │   ListPrice   │  Size   │ SizeUnitMeasureCode │ WeightUnitMeasureCode │    Weight    │ DaysToManufacture │ ProductLine │  Class  │  Style  │ ProductSubcategoryID │ ProductModelID │    SellStartDate    │ 

## ProductCategory

In [9]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductCategory (
ProductCategoryID INT NOT NULL, 
Name TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ProductCategory
    SELECT *  FROM read_parquet('{parquet_dir}/ProductCategory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductCategory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductCategory;")   

if on: con.close()

┌───────────────────┬─────────────┬──────────────────────────────────────┬─────────────────────┐
│ ProductCategoryID │    Name     │               rowguid                │    ModifiedDate     │
│       int32       │   varchar   │               varchar                │      timestamp      │
├───────────────────┼─────────────┼──────────────────────────────────────┼─────────────────────┤
│                 1 │ Bikes       │ CFBDA25C-DF71-47A7-B81B-64EE161AA37C │ 2019-04-30 00:00:00 │
│                 2 │ Components  │ C657828D-D808-4ABA-91A3-AF2CE02300E9 │ 2019-04-30 00:00:00 │
│                 3 │ Clothing    │ 10A7C342-CA82-48D4-8A38-46A2EB089B74 │ 2019-04-30 00:00:00 │
│                 4 │ Accessories │ 2BE3BE36-D9A2-4EEE-B593-ED895D97C2A6 │ 2019-04-30 00:00:00 │
└───────────────────┴─────────────┴──────────────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│            4 │
└──────────────┘



## UnitMeasure

In [10]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE UnitMeasure (
UnitMeasureCode TEXT NOT NULL, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO UnitMeasure
    SELECT *  FROM read_parquet('{parquet_dir}/UnitMeasure.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM UnitMeasure LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM UnitMeasure;")   

if on: con.close()

┌─────────────────┬──────────┬─────────────────────┐
│ UnitMeasureCode │   Name   │    ModifiedDate     │
│     varchar     │ varchar  │      timestamp      │
├─────────────────┼──────────┼─────────────────────┤
│ BOX             │ Boxes    │ 2019-04-30 00:00:00 │
│ BTL             │ Bottle   │ 2019-04-30 00:00:00 │
│ C               │ Celsius  │ 2019-04-30 00:00:00 │
│ CAN             │ Canister │ 2019-04-30 00:00:00 │
│ CAR             │ Carton   │ 2019-04-30 00:00:00 │
└─────────────────┴──────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           38 │
└──────────────┘



## BillOfMaterials

In [11]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE BillOfMaterials (
BillOfMaterialsID INT NOT NULL primary key, 
ProductAssemblyID INT NULL, 
ComponentID INT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NULL, 
UnitMeasureCode TEXT NOT NULL, 
BOMLevel SMALLINT NOT NULL, 
PerAssemblyQty DECIMAL(8, 2) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO BillOfMaterials
    SELECT * FROM read_parquet('{parquet_dir}/BillOfMaterials.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM BillOfMaterials LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM BillOfMaterials;")   

if on: con.close()

┌───────────────────┬───────────────────┬─────────────┬─────────────────────┬─────────────────────┬─────────────────┬──────────┬────────────────┬─────────────────────┐
│ BillOfMaterialsID │ ProductAssemblyID │ ComponentID │      StartDate      │       EndDate       │ UnitMeasureCode │ BOMLevel │ PerAssemblyQty │    ModifiedDate     │
│       int32       │       int32       │    int32    │      timestamp      │      timestamp      │     varchar     │  int16   │  decimal(8,2)  │      timestamp      │
├───────────────────┼───────────────────┼─────────────┼─────────────────────┼─────────────────────┼─────────────────┼──────────┼────────────────┼─────────────────────┤
│               893 │              NULL │         749 │ 2021-05-25 00:00:00 │ NULL                │ EA              │        0 │           1.00 │ 2021-05-11 00:00:00 │
│               271 │              NULL │         750 │ 2021-03-03 00:00:00 │ 2021-05-02 00:00:00 │ EA              │        0 │           1.00 │ 2021-05-02 00:

## BusinessEntity

In [12]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE BusinessEntity (
BusinessEntityID INT NOT NULL primary key, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO BusinessEntity
    SELECT * exclude(column2), replace(column2, '&|', '') FROM read_parquet('{parquet_dir}/BusinessEntity.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM BusinessEntity LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM BusinessEntity;")   

if on: con.close()

┌──────────────────┬──────────────────────────────────────┬─────────────────────────┐
│ BusinessEntityID │               rowguid                │      ModifiedDate       │
│      int32       │               varchar                │        timestamp        │
├──────────────────┼──────────────────────────────────────┼─────────────────────────┤
│                1 │ 0C7D8F81-D7B1-4CF0-9C0A-4CD8B6B50087 │ 2017-12-13 13:20:24.15  │
│                2 │ 6648747F-7843-4002-B317-65389684C398 │ 2017-12-13 13:20:24.43  │
│                3 │ 568204DA-93D7-42F4-8A7A-4446A144277D │ 2017-12-13 13:20:24.54  │
│                4 │ 0EFF57B9-4F4F-41A6-8867-658C199A5FC0 │ 2017-12-13 13:20:24.57  │
│                5 │ B82F88D1-FF79-4FD9-8C54-9D24C140F647 │ 2017-12-13 13:20:24.633 │
└──────────────────┴──────────────────────────────────────┴─────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        20777 │
└──────────────┘



## BusinessEntityAddress

In [13]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE BusinessEntityAddress (
BusinessEntityID INT NOT NULL, 
AddressID INT NOT NULL, 
AddressTypeID INT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (BusinessEntityID, AddressID, AddressTypeID)
);
"""

sql2 = f"""
INSERT INTO BusinessEntityAddress
    SELECT * exclude(column4), replace(column4, '&|', '') FROM read_parquet('{parquet_dir}/BusinessEntityAddress.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM BusinessEntityAddress LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM BusinessEntityAddress;")   

if on: con.close()

┌──────────────────┬───────────┬───────────────┬──────────────────────────────────────┬─────────────────────────┐
│ BusinessEntityID │ AddressID │ AddressTypeID │               rowguid                │      ModifiedDate       │
│      int32       │   int32   │     int32     │               varchar                │        timestamp        │
├──────────────────┼───────────┼───────────────┼──────────────────────────────────────┼─────────────────────────┤
│                1 │       249 │             2 │ 3A5D0A00-6739-4DFE-A8F7-844CD9DEE3DF │ 2025-09-11 11:15:06.967 │
│                2 │       293 │             2 │ 84AE7057-EDF4-4C51-8B8D-3AEAEFBFB4A1 │ 2025-09-11 11:15:06.967 │
│                3 │       224 │             2 │ 3C915B31-7C05-4A05-9859-0DF663677240 │ 2025-09-11 11:15:06.967 │
│                4 │     11387 │             2 │ 3DC70CC4-3AE8-424F-8B1F-481C5478E941 │ 2025-09-11 11:15:06.967 │
│                5 │       190 │             2 │ C0ED2F68-937B-4594-9459-581AC53C98E3 │ 

## BusinessEntityContact

In [14]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE BusinessEntityContact (
BusinessEntityID INT NOT NULL, 
PersonID INT NOT NULL, 
ContactTypeID INT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (BusinessEntityID, PersonID, ContactTypeID)
);
"""

sql2 = f"""
INSERT INTO BusinessEntityContact
    SELECT * exclude(column4), replace(column4, '&|', '') FROM read_parquet('{parquet_dir}/BusinessEntityContact.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM BusinessEntityContact LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM BusinessEntityContact;")   

if on: con.close()

┌──────────────────┬──────────┬───────────────┬──────────────────────────────────────┬─────────────────────────┐
│ BusinessEntityID │ PersonID │ ContactTypeID │               rowguid                │      ModifiedDate       │
│      int32       │  int32   │     int32     │               varchar                │        timestamp        │
├──────────────────┼──────────┼───────────────┼──────────────────────────────────────┼─────────────────────────┤
│              292 │      291 │            11 │ 7D4D2DBC-4A44-48F5-911D-A63ABAFD5120 │ 2017-12-13 13:21:02.243 │
│              294 │      293 │            11 │ 3EA25B65-9579-4260-977D-D6F00D7D20EE │ 2017-12-13 13:21:02.32  │
│              296 │      295 │            11 │ DADAC1FF-3351-4827-9AE0-95004885C193 │ 2017-12-13 13:21:02.383 │
│              298 │      297 │            11 │ B924F26F-6446-45D1-A92B-6F418374F075 │ 2017-12-13 13:21:02.447 │
│              300 │      299 │            11 │ 5BA4E7BE-8D29-46A2-B68D-67B1615B124A │ 2017-12-1

## ContactType

In [15]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ContactType (
ContactTypeID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ContactType
    SELECT * FROM read_parquet('{parquet_dir}/ContactType.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ContactType LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ContactType;")   

if on: con.close()

┌───────────────┬────────────────────────────────┬─────────────────────┐
│ ContactTypeID │              Name              │    ModifiedDate     │
│     int32     │            varchar             │      timestamp      │
├───────────────┼────────────────────────────────┼─────────────────────┤
│             1 │ Accounting Manager             │ 2019-04-30 00:00:00 │
│             2 │ Assistant Sales Agent          │ 2019-04-30 00:00:00 │
│             3 │ Assistant Sales Representative │ 2019-04-30 00:00:00 │
│             4 │ Coordinator Foreign Markets    │ 2019-04-30 00:00:00 │
│             5 │ Export Administrator           │ 2019-04-30 00:00:00 │
└───────────────┴────────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           20 │
└──────────────┘



## CountryRegionCurrency

In [16]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE CountryRegionCurrency (
CountryRegionCode TEXT NOT NULL, 
CurrencyCode TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (CountryRegionCode, CurrencyCode)
);
"""

sql2 = f"""
INSERT INTO CountryRegionCurrency
    SELECT * FROM read_parquet('{parquet_dir}/CountryRegionCurrency.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM CountryRegionCurrency LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM CountryRegionCurrency;")   

if on: con.close()

┌───────────────────┬──────────────┬────────────────────────┐
│ CountryRegionCode │ CurrencyCode │      ModifiedDate      │
│      varchar      │   varchar    │       timestamp        │
├───────────────────┼──────────────┼────────────────────────┤
│ AE                │ AED          │ 2025-02-07 10:17:21.51 │
│ AR                │ ARS          │ 2025-02-07 10:17:21.51 │
│ AT                │ ATS          │ 2025-02-07 10:17:21.51 │
│ AT                │ EUR          │ 2019-04-30 00:00:00    │
│ AU                │ AUD          │ 2025-02-07 10:17:21.51 │
└───────────────────┴──────────────┴────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          109 │
└──────────────┘



## CountryRegion

In [17]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE CountryRegion (
CountryRegionCode TEXT NOT NULL primary key, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO CountryRegion
    SELECT * FROM read_parquet('{parquet_dir}/CountryRegion.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM CountryRegion LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM CountryRegion;")   

if on: con.close()

┌───────────────────┬──────────────────────┬─────────────────────┐
│ CountryRegionCode │         Name         │    ModifiedDate     │
│      varchar      │       varchar        │      timestamp      │
├───────────────────┼──────────────────────┼─────────────────────┤
│ AD                │ Andorra              │ 2019-04-30 00:00:00 │
│ AE                │ United Arab Emirates │ 2019-04-30 00:00:00 │
│ AF                │ Afghanistan          │ 2019-04-30 00:00:00 │
│ AG                │ Antigua and Barbuda  │ 2019-04-30 00:00:00 │
│ AI                │ Anguilla             │ 2019-04-30 00:00:00 │
└───────────────────┴──────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          238 │
└──────────────┘



## CreditCard

In [18]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE CreditCard (
CreditCardID INT NOT NULL primary key, 
CardType TEXT NOT NULL, 
CardNumber TEXT NOT NULL, 
ExpMonth UTINYINT NOT NULL, 
ExpYear SMALLINT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO CreditCard
    SELECT * FROM read_parquet('{parquet_dir}/CreditCard.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM CreditCard LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM CreditCard;")   

if on: con.close()

┌──────────────┬───────────────┬────────────────┬──────────┬─────────┬─────────────────────┐
│ CreditCardID │   CardType    │   CardNumber   │ ExpMonth │ ExpYear │    ModifiedDate     │
│    int32     │    varchar    │    varchar     │  uint8   │  int16  │      timestamp      │
├──────────────┼───────────────┼────────────────┼──────────┼─────────┼─────────────────────┤
│            1 │ SuperiorCard  │ 33332664695310 │       11 │    2006 │ 2024-07-28 00:00:00 │
│            2 │ Distinguish   │ 55552127249722 │        8 │    2005 │ 2024-12-04 00:00:00 │
│            3 │ ColonialVoice │ 77778344838353 │        7 │    2005 │ 2025-01-13 00:00:00 │
│            4 │ ColonialVoice │ 77774915718248 │        7 │    2006 │ 2024-05-19 00:00:00 │
│            5 │ Vista         │ 11114404600042 │        4 │    2005 │ 2024-02-01 00:00:00 │
└──────────────┴───────────────┴────────────────┴──────────┴─────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│

## Culture

In [19]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Culture (
CultureID TEXT NOT NULL primary key, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Culture
    SELECT * FROM read_parquet('{parquet_dir}/Culture.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Culture LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Culture;")   

if on: con.close()

┌───────────┬────────────────────────────────────────┬─────────────────────┐
│ CultureID │                  Name                  │    ModifiedDate     │
│  varchar  │                varchar                 │      timestamp      │
├───────────┼────────────────────────────────────────┼─────────────────────┤
│           │ Invariant Language (Invariant Country) │ 2019-04-30 00:00:00 │
│ ar        │ Arabic                                 │ 2019-04-30 00:00:00 │
│ en        │ English                                │ 2019-04-30 00:00:00 │
│ es        │ Spanish                                │ 2019-04-30 00:00:00 │
│ fr        │ French                                 │ 2019-04-30 00:00:00 │
└───────────┴────────────────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│            8 │
└──────────────┘



## Currency

In [20]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Currency (
CurrencyCode TEXT NOT NULL primary key, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Currency
    SELECT * FROM read_parquet('{parquet_dir}/Currency.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Currency LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Currency;")   

if on: con.close()

┌──────────────┬───────────────────────────────┬─────────────────────┐
│ CurrencyCode │             Name              │    ModifiedDate     │
│   varchar    │            varchar            │      timestamp      │
├──────────────┼───────────────────────────────┼─────────────────────┤
│ AED          │ Emirati Dirham                │ 2019-04-30 00:00:00 │
│ AFA          │ Afghani                       │ 2019-04-30 00:00:00 │
│ ALL          │ Lek                           │ 2019-04-30 00:00:00 │
│ AMD          │ Armenian Dram                 │ 2019-04-30 00:00:00 │
│ ANG          │ Netherlands Antillian Guilder │ 2019-04-30 00:00:00 │
└──────────────┴───────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          105 │
└──────────────┘



## CurrencyRate

In [21]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE CurrencyRate (
CurrencyRateID INT NOT NULL primary key, 
CurrencyRateDate TIMESTAMP NOT NULL, 
FromCurrencyCode TEXT NOT NULL, 
ToCurrencyCode TEXT NOT NULL, 
AverageRate DECIMAL(18,4) NOT NULL, 
EndOfDayRate DECIMAL(18,4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO CurrencyRate
    SELECT * FROM read_parquet('{parquet_dir}/CurrencyRate.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM CurrencyRate LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM CurrencyRate;")   

if on: con.close()

┌────────────────┬─────────────────────┬──────────────────┬────────────────┬───────────────┬───────────────┬─────────────────────┐
│ CurrencyRateID │  CurrencyRateDate   │ FromCurrencyCode │ ToCurrencyCode │  AverageRate  │ EndOfDayRate  │    ModifiedDate     │
│     int32      │      timestamp      │     varchar      │    varchar     │ decimal(18,4) │ decimal(18,4) │      timestamp      │
├────────────────┼─────────────────────┼──────────────────┼────────────────┼───────────────┼───────────────┼─────────────────────┤
│              1 │ 2022-05-30 00:00:00 │ USD              │ ARS            │        1.0000 │        1.0002 │ 2022-05-30 00:00:00 │
│              2 │ 2022-05-30 00:00:00 │ USD              │ AUD            │        1.5491 │        1.5500 │ 2022-05-30 00:00:00 │
│              3 │ 2022-05-30 00:00:00 │ USD              │ BRL            │        1.9379 │        1.9419 │ 2022-05-30 00:00:00 │
│              4 │ 2022-05-30 00:00:00 │ USD              │ CAD            │       

## Customer

In [22]:
if on: con = duckdb_mem_con()

sql1 = """
/* A customer may either be a person, a store, or a person who works for a store */
/* If this customer represents a person, this is non-null */
/* If the customer is a store, or is associated with a store then this is non-null. */
CREATE TABLE Customer (
CustomerID INT NOT NULL primary key, 
PersonID  INT NULL, 
StoreID INT NULL, 
TerritoryID INT NULL, 
AccountNumber TEXT, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Customer
    SELECT * FROM read_parquet('{parquet_dir}/Customer.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Customer LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Customer;")   

if on: con.close()

┌────────────┬──────────┬─────────┬─────────────┬───────────────┬──────────────────────────────────────┬─────────────────────────┐
│ CustomerID │ PersonID │ StoreID │ TerritoryID │ AccountNumber │               rowguid                │      ModifiedDate       │
│   int32    │  int32   │  int32  │    int32    │    varchar    │               varchar                │        timestamp        │
├────────────┼──────────┼─────────┼─────────────┼───────────────┼──────────────────────────────────────┼─────────────────────────┤
│          1 │     NULL │     934 │           1 │ AW00000001    │ 3F5AE95E-B87D-4AED-95B4-C3797AFCB74F │ 2025-09-11 11:15:07.263 │
│          2 │     NULL │    1028 │           1 │ AW00000002    │ E552F657-A9AF-4A7D-A645-C429D6E02491 │ 2025-09-11 11:15:07.263 │
│          3 │     NULL │     642 │           4 │ AW00000003    │ 130774B1-DB21-4EF3-98C8-C104BCD6ED6D │ 2025-09-11 11:15:07.263 │
│          4 │     NULL │     932 │           4 │ AW00000004    │ FF862851-1DAA-404

## Department

In [23]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Department (
DepartmentID SMALLINT NOT NULL primary key, 
Name TEXT NOT NULL, 
GroupName TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Department
    SELECT * FROM read_parquet('{parquet_dir}/Department.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Department LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Department;")   

if on: con.close()

┌──────────────┬─────────────┬──────────────────────────┬─────────────────────┐
│ DepartmentID │    Name     │        GroupName         │    ModifiedDate     │
│    int16     │   varchar   │         varchar          │      timestamp      │
├──────────────┼─────────────┼──────────────────────────┼─────────────────────┤
│            1 │ Engineering │ Research and Development │ 2008-04-30 00:00:00 │
│            2 │ Tool Design │ Research and Development │ 2008-04-30 00:00:00 │
│            3 │ Sales       │ Sales and Marketing      │ 2008-04-30 00:00:00 │
│            4 │ Marketing   │ Sales and Marketing      │ 2008-04-30 00:00:00 │
│            5 │ Purchasing  │ Inventory Management     │ 2008-04-30 00:00:00 │
└──────────────┴─────────────┴──────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           16 │
└──────────────┘



## EmailAddress

In [24]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE EmailAddress (
BusinessEntityID INT NOT NULL, 
EmailAddressID INT NOT NULL, 
EmailAddress TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (BusinessEntityID, EmailAddressID)
);
"""

sql2 = f"""
INSERT INTO EmailAddress
    SELECT * exclude(column4), replace(column4, '&|', '') FROM read_parquet('{parquet_dir}/EmailAddress.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM EmailAddress LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM EmailAddress;")   

if on: con.close()

┌──────────────────┬────────────────┬──────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │ EmailAddressID │         EmailAddress         │               rowguid                │    ModifiedDate     │
│      int32       │     int32      │           varchar            │               varchar                │      timestamp      │
├──────────────────┼────────────────┼──────────────────────────────┼──────────────────────────────────────┼─────────────────────┤
│                1 │              1 │ ken0@adventure-works.com     │ 8A1901E4-671B-431A-871C-EADB2942E9EE │ 2020-01-07 00:00:00 │
│                2 │              2 │ terri0@adventure-works.com   │ B5FF9EFD-72A2-4F87-830B-F338FDD4D162 │ 2019-01-23 00:00:00 │
│                3 │              3 │ roberto0@adventure-works.com │ C8A51084-1C03-4C58-A8B3-55854AE7C499 │ 2018-11-03 00:00:00 │
│                4 │              4 │ rob0@adventure-works.com     │ 17703ED1-0031-4B4A-AF

## Employee

In [25]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Employee (
BusinessEntityID INT NOT NULL primary key, 
NationalIDNumber TEXT NOT NULL, 
LoginID TEXT NOT NULL, 
OrganizationNode TEXT NULL, 
OrganizationLevel INT, 
JobTitle TEXT NOT NULL, 
BirthDate DATE NOT NULL, 
MaritalStatus TEXT NOT NULL, 
Gender TEXT NOT NULL, 
HireDate DATE NOT NULL, 
SalariedFlag BIT NOT NULL , 
VacationHours SMALLINT NOT NULL, 
SickLeaveHours SMALLINT NOT NULL, 
CurrentFlag BIT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL 
);
"""

sql2 = f"""
INSERT INTO Employee
    SELECT * FROM read_parquet('{parquet_dir}/Employee.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Employee LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Employee;")   

if on: con.close()

┌──────────────────┬──────────────────┬──────────────────────────┬──────────────────┬───────────────────┬───────────────────────────────┬────────────┬───────────────┬─────────┬────────────┬──────────────────────────────────────────────────────────────────┬───────────────┬────────────────┬──────────────────────────────────────────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │ NationalIDNumber │         LoginID          │ OrganizationNode │ OrganizationLevel │           JobTitle            │ BirthDate  │ MaritalStatus │ Gender  │  HireDate  │                           SalariedFlag                           │ VacationHours │ SickLeaveHours │                           CurrentFlag                            │               rowguid                │    ModifiedDate     │
│      int32       │     varchar      │         varchar          │     varchar      │       int32       │            varchar            │    date    │    varchar   

## EmployeeDepartmentHistory

In [26]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE EmployeeDepartmentHistory (
BusinessEntityID INT NOT NULL, 
DepartmentID SMALLINT NOT NULL, 
ShiftID UTINYINT NOT NULL, 
StartDate DATE NOT NULL, 
EndDate DATE NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (BusinessEntityID, StartDate, DepartmentID, ShiftID)
);
"""

sql2 = f"""
INSERT INTO EmployeeDepartmentHistory
    SELECT * FROM read_parquet('{parquet_dir}/EmployeeDepartmentHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM EmployeeDepartmentHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM EmployeeDepartmentHistory;")   

if on: con.close()

┌──────────────────┬──────────────┬─────────┬────────────┬────────────┬─────────────────────┐
│ BusinessEntityID │ DepartmentID │ ShiftID │ StartDate  │  EndDate   │    ModifiedDate     │
│      int32       │    int16     │  uint8  │    date    │    date    │      timestamp      │
├──────────────────┼──────────────┼─────────┼────────────┼────────────┼─────────────────────┤
│                1 │           16 │       1 │ 2009-01-14 │ NULL       │ 2009-01-13 00:00:00 │
│                2 │            1 │       1 │ 2008-01-31 │ NULL       │ 2008-01-30 00:00:00 │
│                3 │            1 │       1 │ 2007-11-11 │ NULL       │ 2007-11-10 00:00:00 │
│                4 │            1 │       1 │ 2007-12-05 │ 2010-05-30 │ 2010-05-28 00:00:00 │
│                4 │            2 │       1 │ 2010-05-31 │ NULL       │ 2010-05-30 00:00:00 │
└──────────────────┴──────────────┴─────────┴────────────┴────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├───────

## EmployeePayHistory

In [27]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE EmployeePayHistory (
BusinessEntityID INT NOT NULL, 
RateChangeDate TIMESTAMP NOT NULL, 
Rate DECIMAL(18,4) NOT NULL, 
PayFrequency UTINYINT NOT NULL, /* 1 = monthly salary, 2 = biweekly salary */
ModifiedDate TIMESTAMP NOT NULL, 
primary key (BusinessEntityID, RateChangeDate)
);
"""

sql2 = f"""
INSERT INTO EmployeePayHistory
    SELECT * FROM read_parquet('{parquet_dir}/EmployeePayHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM EmployeePayHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM EmployeePayHistory;")   

if on: con.close()

┌──────────────────┬─────────────────────┬───────────────┬──────────────┬─────────────────────┐
│ BusinessEntityID │   RateChangeDate    │     Rate      │ PayFrequency │    ModifiedDate     │
│      int32       │      timestamp      │ decimal(18,4) │    uint8     │      timestamp      │
├──────────────────┼─────────────────────┼───────────────┼──────────────┼─────────────────────┤
│                1 │ 2009-01-14 00:00:00 │      125.5000 │            2 │ 2014-06-30 00:00:00 │
│                2 │ 2008-01-31 00:00:00 │       63.4615 │            2 │ 2014-06-30 00:00:00 │
│                3 │ 2007-11-11 00:00:00 │       43.2692 │            2 │ 2014-06-30 00:00:00 │
│                4 │ 2007-12-05 00:00:00 │        8.6200 │            2 │ 2007-11-21 00:00:00 │
│                4 │ 2010-05-31 00:00:00 │       23.7200 │            2 │ 2010-05-16 00:00:00 │
└──────────────────┴─────────────────────┴───────────────┴──────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    

## Location

In [28]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Location (
LocationID SMALLINT NOT NULL primary key, 
Name TEXT NOT NULL, 
CostRate Decimal(18,4) NOT NULL, 
Availability DECIMAL(8, 2) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Location
    SELECT * FROM read_parquet('{parquet_dir}/Location.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Location LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Location;")   

if on: con.close()

┌────────────┬───────────────────┬───────────────┬──────────────┬─────────────────────┐
│ LocationID │       Name        │   CostRate    │ Availability │    ModifiedDate     │
│   int16    │      varchar      │ decimal(18,4) │ decimal(8,2) │      timestamp      │
├────────────┼───────────────────┼───────────────┼──────────────┼─────────────────────┤
│          1 │ Tool Crib         │        0.0000 │         0.00 │ 2019-04-30 00:00:00 │
│          2 │ Sheet Metal Racks │        0.0000 │         0.00 │ 2019-04-30 00:00:00 │
│          3 │ Paint Shop        │        0.0000 │         0.00 │ 2019-04-30 00:00:00 │
│          4 │ Paint Storage     │        0.0000 │         0.00 │ 2019-04-30 00:00:00 │
│          5 │ Metal Storage     │        0.0000 │         0.00 │ 2019-04-30 00:00:00 │
└────────────┴───────────────────┴───────────────┴──────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           14 │
└──────────────┘



## Password

In [29]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Password (
BusinessEntityID INT NOT NULL primary key, 
PasswordHash TEXT NOT NULL, 
PasswordSalt TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Password
    SELECT  * exclude(column4), replace(column4, '&|', '') FROM read_parquet('{parquet_dir}/Password.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Password LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Password;")   

if on: con.close()

┌──────────────────┬──────────────────────────────────────────────┬──────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │                 PasswordHash                 │ PasswordSalt │               rowguid                │    ModifiedDate     │
│      int32       │                   varchar                    │   varchar    │               varchar                │      timestamp      │
├──────────────────┼──────────────────────────────────────────────┼──────────────┼──────────────────────────────────────┼─────────────────────┤
│                1 │ pbFwXWE99vobT6g+vPWFy93NtUU/orrIWafF01hccfM= │ bE3XiWw=     │ 329EACBE-C883-4F48-B8B6-17AA4627EFFF │ 2020-01-07 00:00:00 │
│                2 │ bawRVNrZQYQ05qF05Gz6VLilnviZmrqBReTTAGAudm0= │ EjJaC3U=     │ A4C82398-7466-4FE6-B9EE-CEC34D116F68 │ 2019-01-23 00:00:00 │
│                3 │ 8BUXrZfDqO1IyHCWOYzYmqN1IhTUn3CJMpdx/UCQ3iY= │ wbPZqMw=     │ AC3F4536-BB2E-41C5-B70D-454BE460C1BD │ 2018-11-03 00:

## Person

In [30]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Person (
BusinessEntityID INT NOT NULL primary key, 
PersonType TEXT NOT NULL, 
NameStyle BIT NOT NULL, 
Title TEXT NULL, 
FirstName TEXT NOT NULL, 
MiddleName TEXT NULL, 
LastName TEXT NOT NULL, 
Suffix TEXT NULL, 
EmailPromotion INT NOT NULL, 
AdditionalContactInfo TEXT NULL, 
Demographics TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO Person
    SELECT * exclude(column12), replace(column12, '&|', '') FROM read_parquet('{parquet_dir}/Person.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Person LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Person;")   

if on: con.close()

┌──────────────────┬────────────┬──────────────────────────────────────────────────────────────────┬─────────┬───────────┬────────────┬────────────┬─────────┬────────────────┬───────────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │ PersonType │                            NameStyle                             │  Title  │ FirstName │ MiddleName │  LastName  │ Suffix  │ EmailPromotion │ AdditionalContactInfo │                                                                            Demographics                                                                             │               rowguid                │    ModifiedDate     │
│      int32       │  varchar   │                               bit                                │ varchar │  varchar  │  varchar   │  varchar  

## PersonCreditCard

In [31]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE PersonCreditCard (
BusinessEntityID INT NOT NULL, 
CreditCardID INT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (BusinessEntityID, CreditCardID)
);
"""

sql2 = f"""
INSERT INTO PersonCreditCard
    SELECT * FROM read_parquet('{parquet_dir}/PersonCreditCard.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM PersonCreditCard LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM PersonCreditCard;")   

if on: con.close()

┌──────────────────┬──────────────┬─────────────────────┐
│ BusinessEntityID │ CreditCardID │    ModifiedDate     │
│      int32       │    int32     │      timestamp      │
├──────────────────┼──────────────┼─────────────────────┤
│              293 │        17038 │ 2024-07-30 00:00:00 │
│              295 │        15369 │ 2022-07-31 00:00:00 │
│              297 │         8010 │ 2022-07-31 00:00:00 │
│              299 │         5316 │ 2024-07-30 00:00:00 │
│              301 │         6653 │ 2022-05-30 00:00:00 │
└──────────────────┴──────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        19118 │
└──────────────┘



## PersonPhone

In [32]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE PersonPhone (
BusinessEntityID INT NOT NULL, 
PhoneNumber TEXT NOT NULL, 
PhoneNumberTypeID INT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (BusinessEntityID, PhoneNumber, PhoneNumberTypeID)
);
"""

sql2 = f"""
INSERT INTO PersonPhone
    SELECT * exclude(column3), replace(column3, '&|', '') FROM read_parquet('{parquet_dir}/PersonPhone.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM PersonPhone LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM PersonPhone;")   

if on: con.close()

┌──────────────────┬──────────────┬───────────────────┬─────────────────────┐
│ BusinessEntityID │ PhoneNumber  │ PhoneNumberTypeID │    ModifiedDate     │
│      int32       │   varchar    │       int32       │      timestamp      │
├──────────────────┼──────────────┼───────────────────┼─────────────────────┤
│                1 │ 697-555-0142 │                 1 │ 2020-01-07 00:00:00 │
│                2 │ 819-555-0175 │                 3 │ 2019-01-23 00:00:00 │
│                3 │ 212-555-0187 │                 1 │ 2018-11-03 00:00:00 │
│                4 │ 612-555-0100 │                 1 │ 2018-11-27 00:00:00 │
│                5 │ 849-555-0139 │                 1 │ 2018-12-29 00:00:00 │
└──────────────────┴──────────────┴───────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        19972 │
└──────────────┘



## PhoneNumberType

In [33]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE PhoneNumberType (
PhoneNumberTypeID INT NOT NULL, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO PhoneNumberType
    SELECT * exclude(column2), replace(column2, '&|', '') FROM read_parquet('{parquet_dir}/PhoneNumberType.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM PhoneNumberType LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM PhoneNumberType;")   

if on: con.close()

┌───────────────────┬─────────┬─────────────────────────┐
│ PhoneNumberTypeID │  Name   │      ModifiedDate       │
│       int32       │ varchar │        timestamp        │
├───────────────────┼─────────┼─────────────────────────┤
│                 1 │ Cell    │ 2017-12-13 13:19:22.273 │
│                 2 │ Home    │ 2017-12-13 13:19:22.273 │
│                 3 │ Work    │ 2017-12-13 13:19:22.273 │
└───────────────────┴─────────┴─────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│            3 │
└──────────────┘



## ProductCostHistory

In [34]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductCostHistory (
ProductID INT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NULL, 
StandardCost DECIMAL(18,4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (ProductID, StartDate)
);
"""

sql2 = f"""
INSERT INTO ProductCostHistory
    SELECT *  FROM read_parquet('{parquet_dir}/ProductCostHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductCostHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductCostHistory;")   

if on: con.close()

┌───────────┬─────────────────────┬─────────────────────┬───────────────┬─────────────────────┐
│ ProductID │      StartDate      │       EndDate       │ StandardCost  │    ModifiedDate     │
│   int32   │      timestamp      │      timestamp      │ decimal(18,4) │      timestamp      │
├───────────┼─────────────────────┼─────────────────────┼───────────────┼─────────────────────┤
│       707 │ 2022-05-30 00:00:00 │ 2023-05-29 00:00:00 │       12.0278 │ 2023-05-29 00:00:00 │
│       707 │ 2023-05-30 00:00:00 │ 2024-05-28 00:00:00 │       13.8782 │ 2024-05-28 00:00:00 │
│       707 │ 2024-05-29 00:00:00 │ NULL                │       13.0863 │ 2024-05-15 00:00:00 │
│       708 │ 2022-05-30 00:00:00 │ 2023-05-29 00:00:00 │       12.0278 │ 2023-05-29 00:00:00 │
│       708 │ 2023-05-30 00:00:00 │ 2024-05-28 00:00:00 │       13.8782 │ 2024-05-28 00:00:00 │
└───────────┴─────────────────────┴─────────────────────┴───────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    

## ProductDescription

In [35]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductDescription (
ProductDescriptionID INT NOT NULL, 
Description TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ProductDescription
    SELECT *  FROM read_parquet('{parquet_dir}/ProductDescription.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductDescription LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductDescription;")   

if on: con.close()

┌──────────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ ProductDescriptionID │                                                                       Description                                                                       │               rowguid                │    ModifiedDate     │
│        int32         │                                                                         varchar                                                                         │               varchar                │      timestamp      │
├──────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────┼─────────────────────┤
│                    3 │ Chromoly steel.

## ProductDocument

In [36]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductDocument (
ProductID INT NOT NULL, 
DocumentNode TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (ProductID, DocumentNode)
);
"""

sql2 = f"""
INSERT INTO ProductDocument
    SELECT *  FROM read_parquet('{parquet_dir}/ProductDocument.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductDocument LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductDocument;")   

if on: con.close()

┌───────────┬──────────────┬─────────────────────────┐
│ ProductID │ DocumentNode │      ModifiedDate       │
│   int32   │   varchar    │        timestamp        │
├───────────┼──────────────┼─────────────────────────┤
│       317 │ 6AC0         │ 2024-12-28 13:51:58.103 │
│       318 │ 6AC0         │ 2024-12-28 13:51:58.103 │
│       319 │ 6AC0         │ 2024-12-28 13:51:58.103 │
│       506 │ 7AC0         │ 2024-12-28 13:51:58.103 │
│       506 │ 7B40         │ 2024-12-28 13:51:58.103 │
└───────────┴──────────────┴─────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           32 │
└──────────────┘



## ProductInventory

In [37]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductInventory (
ProductID INT NOT NULL, 
LocationID SMALLINT NOT NULL, 
Shelf TEXT NOT NULL, 
Bin UTINYINT NOT NULL, 
Quantity SMALLINT NOT NULL , 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (ProductID, LocationID)
);
"""

sql2 = f"""
INSERT INTO ProductInventory
    SELECT *  FROM read_parquet('{parquet_dir}/ProductInventory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductInventory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductInventory;")   

if on: con.close()

┌───────────┬────────────┬─────────┬───────┬──────────┬──────────────────────────────────────┬─────────────────────┐
│ ProductID │ LocationID │  Shelf  │  Bin  │ Quantity │               rowguid                │    ModifiedDate     │
│   int32   │   int16    │ varchar │ uint8 │  int16   │               varchar                │      timestamp      │
├───────────┼────────────┼─────────┼───────┼──────────┼──────────────────────────────────────┼─────────────────────┤
│         1 │          1 │ A       │     1 │      408 │ 47A24246-6C43-48EB-968F-025738A8A410 │ 2025-08-07 00:00:00 │
│         1 │          6 │ B       │     5 │      324 │ D4544D7D-CAF5-46B3-AB22-5718DCC26B5E │ 2025-08-07 00:00:00 │
│         1 │         50 │ A       │     5 │      353 │ BFF7DC60-96A8-43CA-81A7-D6D2ED3000A8 │ 2025-08-07 00:00:00 │
│         2 │          1 │ A       │     2 │      427 │ F407C07A-CA14-4684-A02C-608BD00C2233 │ 2025-08-07 00:00:00 │
│         2 │          6 │ B       │     1 │      318 │ CA1FF2F4

## ProductListPriceHistory

In [38]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductListPriceHistory (
ProductID INT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NULL, 
ListPrice DECIMAL(18,4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (ProductID, StartDate)
);
"""

sql2 = f"""
INSERT INTO ProductListPriceHistory
    SELECT *  FROM read_parquet('{parquet_dir}/ProductListPriceHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductListPriceHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductListPriceHistory;")   

if on: con.close()

┌───────────┬─────────────────────┬─────────────────────┬───────────────┬─────────────────────┐
│ ProductID │      StartDate      │       EndDate       │   ListPrice   │    ModifiedDate     │
│   int32   │      timestamp      │      timestamp      │ decimal(18,4) │      timestamp      │
├───────────┼─────────────────────┼─────────────────────┼───────────────┼─────────────────────┤
│       707 │ 2022-05-30 00:00:00 │ 2023-05-29 00:00:00 │       33.6442 │ 2023-05-29 00:00:00 │
│       707 │ 2023-05-30 00:00:00 │ 2024-05-28 00:00:00 │       33.6442 │ 2024-05-28 00:00:00 │
│       707 │ 2024-05-29 00:00:00 │ NULL                │       34.9900 │ 2024-05-08 00:00:00 │
│       708 │ 2022-05-30 00:00:00 │ 2023-05-29 00:00:00 │       33.6442 │ 2023-05-29 00:00:00 │
│       708 │ 2023-05-30 00:00:00 │ 2024-05-28 00:00:00 │       33.6442 │ 2024-05-28 00:00:00 │
└───────────┴─────────────────────┴─────────────────────┴───────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    

## ProductModel

In [39]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductModel (
ProductModelID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
CatalogDescription TEXT NULL, 
Instructions TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ProductModel
    SELECT * exclude(column5), replace(column5, '&|', '')  FROM read_parquet('{parquet_dir}/ProductModel.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductModel LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductModel;")   

if on: con.close()

┌────────────────┬────────────────────┬────────────────────┬──────────────┬──────────────────────────────────────┬─────────────────────┐
│ ProductModelID │        Name        │ CatalogDescription │ Instructions │               rowguid                │    ModifiedDate     │
│     int32      │      varchar       │      varchar       │   varchar    │               varchar                │      timestamp      │
├────────────────┼────────────────────┼────────────────────┼──────────────┼──────────────────────────────────────┼─────────────────────┤
│              1 │ Classic Vest       │ NULL               │ NULL         │ 29321D47-1E4C-4AAC-887C-19634328C25E │ 2024-04-29 00:00:00 │
│              2 │ Cycling Cap        │ NULL               │ NULL         │ 474FB654-3C96-4CB9-82DF-2152EEFFBDB0 │ 2022-04-30 00:00:00 │
│              3 │ Full-Finger Gloves │ NULL               │ NULL         │ A75483FE-3C47-4AA4-93CF-664B51192987 │ 2023-04-30 00:00:00 │
│              4 │ Half-Finger Gloves │ N

## ProductModelIllustration

In [40]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductModelIllustration (
ProductModelID INT NOT NULL, 
IllustrationID INT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ProductModelIllustration
    SELECT *  FROM read_parquet('{parquet_dir}/ProductModelIllustration.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductModelIllustration LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductModelIllustration;")   

if on: con.close()

┌────────────────┬────────────────┬─────────────────────────┐
│ ProductModelID │ IllustrationID │      ModifiedDate       │
│     int32      │     int32      │        timestamp        │
├────────────────┼────────────────┼─────────────────────────┤
│              7 │              3 │ 2025-01-08 14:41:02.167 │
│             10 │              3 │ 2025-01-08 14:41:02.167 │
│             47 │              4 │ 2025-01-08 14:41:02.183 │
│             47 │              5 │ 2025-01-08 14:41:02.183 │
│             48 │              4 │ 2025-01-08 14:41:02.183 │
└────────────────┴────────────────┴─────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│            7 │
└──────────────┘



## ProductModelProductDescriptionCulture

In [41]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductModelProductDescriptionCulture (
ProductModelID INT NOT NULL, 
ProductDescriptionID INT NOT NULL, 
CultureID TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (ProductModelID, ProductDescriptionID, CultureID)
);
"""

sql2 = f"""
INSERT INTO ProductModelProductDescriptionCulture
    SELECT *  FROM read_parquet('{parquet_dir}/ProductModelProductDescriptionCulture.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductModelProductDescriptionCulture LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductModelProductDescriptionCulture;")   

if on: con.close()

┌────────────────┬──────────────────────┬───────────┬─────────────────────┐
│ ProductModelID │ ProductDescriptionID │ CultureID │    ModifiedDate     │
│     int32      │        int32         │  varchar  │      timestamp      │
├────────────────┼──────────────────────┼───────────┼─────────────────────┤
│              1 │                 1199 │ en        │ 2024-04-29 00:00:00 │
│              1 │                 1467 │ ar        │ 2024-04-29 00:00:00 │
│              1 │                 1589 │ fr        │ 2024-04-29 00:00:00 │
│              1 │                 1712 │ th        │ 2024-04-29 00:00:00 │
│              1 │                 1838 │ he        │ 2024-04-29 00:00:00 │
└────────────────┴──────────────────────┴───────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          762 │
└──────────────┘



## ProductSubcategory

In [42]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductSubcategory (
ProductSubcategoryID INT NOT NULL primary key, 
ProductCategoryID INT NOT NULL, 
Name TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ProductSubcategory
    SELECT *  FROM read_parquet('{parquet_dir}/ProductSubcategory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductSubcategory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductSubcategory;")   

if on: con.close()

┌──────────────────────┬───────────────────┬─────────────────┬──────────────────────────────────────┬─────────────────────┐
│ ProductSubcategoryID │ ProductCategoryID │      Name       │               rowguid                │    ModifiedDate     │
│        int32         │       int32       │     varchar     │               varchar                │      timestamp      │
├──────────────────────┼───────────────────┼─────────────────┼──────────────────────────────────────┼─────────────────────┤
│                    1 │                 1 │ Mountain Bikes  │ 2D364ADE-264A-433C-B092-4FCBF3804E01 │ 2019-04-30 00:00:00 │
│                    2 │                 1 │ Road Bikes      │ 000310C0-BCC8-42C4-B0C3-45AE611AF06B │ 2019-04-30 00:00:00 │
│                    3 │                 1 │ Touring Bikes   │ 02C5061D-ECDC-4274-B5F1-E91D76BC3F37 │ 2019-04-30 00:00:00 │
│                    4 │                 2 │ Handlebars      │ 3EF2C725-7135-4C85-9AE6-AE9A3BDD9283 │ 2019-04-30 00:00:00 │
│       

## ProductVendor

In [43]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ProductVendor (
ProductID INT NOT NULL, 
BusinessEntityID INT NOT NULL, 
AverageLeadTime INT NOT NULL, 
StandardPrice DECIMAL(18,4) NOT NULL, 
LastReceiptCost DECIMAL(18,4) NULL, 
LastReceiptDate TIMESTAMP NULL, 
MinOrderQty INT NOT NULL, 
MaxOrderQty INT NOT NULL, 
OnOrderQty INT NULL, 
UnitMeasureCode TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (ProductID, BusinessEntityID)
);
"""

sql2 = f"""
INSERT INTO ProductVendor
    SELECT *  FROM read_parquet('{parquet_dir}/ProductVendor.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ProductVendor LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ProductVendor;")   

if on: con.close()

┌───────────┬──────────────────┬─────────────────┬───────────────┬─────────────────┬─────────────────────┬─────────────┬─────────────┬────────────┬─────────────────┬─────────────────────┐
│ ProductID │ BusinessEntityID │ AverageLeadTime │ StandardPrice │ LastReceiptCost │   LastReceiptDate   │ MinOrderQty │ MaxOrderQty │ OnOrderQty │ UnitMeasureCode │    ModifiedDate     │
│   int32   │      int32       │      int32      │ decimal(18,4) │  decimal(18,4)  │      timestamp      │    int32    │    int32    │   int32    │     varchar     │      timestamp      │
├───────────┼──────────────────┼─────────────────┼───────────────┼─────────────────┼─────────────────────┼─────────────┼─────────────┼────────────┼─────────────────┼─────────────────────┤
│         1 │             1580 │              17 │       47.8700 │         50.2635 │ 2022-08-28 00:00:00 │           1 │           5 │          3 │ CS              │ 2022-08-28 00:00:00 │
│         2 │             1688 │              19 │       39.

## PurchaseOrderDetail

In [44]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE PurchaseOrderDetail (
PurchaseOrderID INT NOT NULL, 
PurchaseOrderDetailID INT NOT NULL, 
DueDate TIMESTAMP NOT NULL, 
OrderQty SMALLINT NOT NULL, 
ProductID INT NOT NULL, 
UnitPrice DECIMAL(18,4) NOT NULL, 
LineTotal DECIMAL(18,4), 
ReceivedQty DECIMAL(8,2) NOT NULL, 
RejectedQty DECIMAL(8,2) NOT NULL, 
StockedQty DECIMAL(8,2), 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (PurchaseOrderID, PurchaseOrderDetailID)
);
"""

sql2 = f"""
INSERT INTO PurchaseOrderDetail
    SELECT *  FROM read_parquet('{parquet_dir}/PurchaseOrderDetail.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM PurchaseOrderDetail LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM PurchaseOrderDetail;")   

if on: con.close()

┌─────────────────┬───────────────────────┬─────────────────────┬──────────┬───────────┬───────────────┬───────────────┬──────────────┬──────────────┬──────────────┬─────────────────────┐
│ PurchaseOrderID │ PurchaseOrderDetailID │       DueDate       │ OrderQty │ ProductID │   UnitPrice   │   LineTotal   │ ReceivedQty  │ RejectedQty  │  StockedQty  │    ModifiedDate     │
│      int32      │         int32         │      timestamp      │  int16   │   int32   │ decimal(18,4) │ decimal(18,4) │ decimal(8,2) │ decimal(8,2) │ decimal(8,2) │      timestamp      │
├─────────────────┼───────────────────────┼─────────────────────┼──────────┼───────────┼───────────────┼───────────────┼──────────────┼──────────────┼──────────────┼─────────────────────┤
│               1 │                     1 │ 2022-04-29 00:00:00 │        4 │         1 │       50.2600 │      201.0400 │         3.00 │         0.00 │         3.00 │ 2022-04-22 00:00:00 │
│               2 │                     2 │ 2022-04-29 00:00

## PurchaseOrderHeader

In [45]:
if on: con = duckdb_mem_con()

sql1 = """
/* 1 = Pending;
 2 = Approved;
 3 = Rejected;
 4 = Complete */
CREATE TABLE PurchaseOrderHeader (
PurchaseOrderID INT NOT NULL primary key, 
RevisionNumber UTINYINT NOT NULL, 
Status UTINYINT NOT NULL, 
EmployeeID INT NOT NULL, 
VendorID INT NOT NULL, 
ShipMethodID INT NOT NULL, 
OrderDate TIMESTAMP NOT NULL, 
ShipDate TIMESTAMP NULL, 
SubTotal DECIMAL(18,4) NOT NULL , 
TaxAmt DECIMAL(18,4) NOT NULL , 
Freight DECIMAL(18,4) NOT NULL , 
TotalDue DECIMAL(18,4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO PurchaseOrderHeader
    SELECT *  FROM read_parquet('{parquet_dir}/PurchaseOrderHeader.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM PurchaseOrderHeader LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM PurchaseOrderHeader;")   

if on: con.close()

┌─────────────────┬────────────────┬────────┬────────────┬──────────┬──────────────┬─────────────────────┬─────────────────────┬───────────────┬───────────────┬───────────────┬───────────────┬─────────────────────┐
│ PurchaseOrderID │ RevisionNumber │ Status │ EmployeeID │ VendorID │ ShipMethodID │      OrderDate      │      ShipDate       │   SubTotal    │    TaxAmt     │    Freight    │   TotalDue    │    ModifiedDate     │
│      int32      │     uint8      │ uint8  │   int32    │  int32   │    int32     │      timestamp      │      timestamp      │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │      timestamp      │
├─────────────────┼────────────────┼────────┼────────────┼──────────┼──────────────┼─────────────────────┼─────────────────────┼───────────────┼───────────────┼───────────────┼───────────────┼─────────────────────┤
│               1 │              5 │      4 │        258 │     1580 │            3 │ 2022-04-15 00:00:00 │ 2022-04-24 00:00:00 │      201.04

## SalesOrderDetail

In [46]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesOrderDetail (
SalesOrderID INT NOT NULL, 
SalesOrderDetailID INT NOT NULL, 
CarrierTrackingNumber TEXT NULL, 
OrderQty SMALLINT NOT NULL, 
ProductID INT NOT NULL, 
SpecialOfferID INT NOT NULL, 
UnitPrice DECIMAL(18,4) NOT NULL, 
UnitPriceDiscount DECIMAL(18,4) NOT NULL , 
LineTotal DECIMAL(18,4) NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (SalesOrderID, SalesOrderDetailID)
);
"""

sql2 = f"""
INSERT INTO SalesOrderDetail
    SELECT *  FROM read_parquet('{parquet_dir}/SalesOrderDetail.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesOrderDetail LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesOrderDetail;")   

if on: con.close()

┌──────────────┬────────────────────┬───────────────────────┬──────────┬───────────┬────────────────┬───────────────┬───────────────────┬───────────────┬──────────────────────────────────────┬─────────────────────┐
│ SalesOrderID │ SalesOrderDetailID │ CarrierTrackingNumber │ OrderQty │ ProductID │ SpecialOfferID │   UnitPrice   │ UnitPriceDiscount │   LineTotal   │               rowguid                │    ModifiedDate     │
│    int32     │       int32        │        varchar        │  int16   │   int32   │     int32      │ decimal(18,4) │   decimal(18,4)   │ decimal(18,4) │               varchar                │      timestamp      │
├──────────────┼────────────────────┼───────────────────────┼──────────┼───────────┼────────────────┼───────────────┼───────────────────┼───────────────┼──────────────────────────────────────┼─────────────────────┤
│        43659 │                  1 │ 4911-403C-98          │        1 │       776 │              1 │     2024.9940 │            0.0000 │   

## SalesOrderHeader

In [47]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesOrderHeader (
SalesOrderID INT NOT NULL primary key, 
RevisionNumber UTINYINT NOT NULL , 
OrderDate TIMESTAMP NOT NULL, 
DueDate TIMESTAMP NOT NULL, 
ShipDate TIMESTAMP NULL, 
Status UTINYINT NOT NULL , 
OnlineOrderFlag BIT NOT NULL , 
SalesOrderNumber TEXT NOT NULL, 
PurchaseOrderNumber TEXT NULL, 
AccountNumber TEXT NULL, 
CustomerID INT NOT NULL, 
SalesPersonID INT NULL, 
TerritoryID INT NULL, 
BillToAddressID INT NOT NULL, 
ShipToAddressID INT NOT NULL, 
ShipMethodID INT NOT NULL, 
CreditCardID INT NULL, 
CreditCardApprovalCode TEXT NULL, 
CurrencyRateID INT NULL, 
SubTotal DECIMAL(18,4) NOT NULL , 
TaxAmt DECIMAL(18,4) NOT NULL , 
Freight DECIMAL(18,4) NOT NULL , 
TotalDue DECIMAL(18,4) NOT NULL, 
Comment TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO SalesOrderHeader
    SELECT *  FROM read_parquet('{parquet_dir}/SalesOrderHeader.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesOrderHeader LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesOrderHeader;")   

if on: con.close()

┌──────────────┬────────────────┬─────────────────────┬─────────────────────┬─────────────────────┬────────┬──────────────────────────────────────────────────────────────────┬──────────────────┬─────────────────────┬────────────────┬────────────┬───────────────┬─────────────┬─────────────────┬─────────────────┬──────────────┬──────────────┬────────────────────────┬────────────────┬───────────────┬───────────────┬───────────────┬───────────────┬─────────┬──────────────────────────────────────┬─────────────────────┐
│ SalesOrderID │ RevisionNumber │      OrderDate      │       DueDate       │      ShipDate       │ Status │                         OnlineOrderFlag                          │ SalesOrderNumber │ PurchaseOrderNumber │ AccountNumber  │ CustomerID │ SalesPersonID │ TerritoryID │ BillToAddressID │ ShipToAddressID │ ShipMethodID │ CreditCardID │ CreditCardApprovalCode │ CurrencyRateID │   SubTotal    │    TaxAmt     │    Freight    │   TotalDue    │ Comment │               rowguid

## SalesOrderHeaderSalesReason

In [48]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesOrderHeaderSalesReason (
SalesOrderID INT NOT NULL, 
SalesReasonID INT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (SalesOrderID, SalesReasonID)
);
"""

sql2 = f"""
INSERT INTO SalesOrderHeaderSalesReason
    SELECT *  FROM read_parquet('{parquet_dir}/SalesOrderHeaderSalesReason.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesOrderHeaderSalesReason LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesOrderHeaderSalesReason;")   

if on: con.close()

┌──────────────┬───────────────┬─────────────────────┐
│ SalesOrderID │ SalesReasonID │    ModifiedDate     │
│    int32     │     int32     │      timestamp      │
├──────────────┼───────────────┼─────────────────────┤
│        43697 │             5 │ 2022-05-30 00:00:00 │
│        43697 │             9 │ 2022-05-30 00:00:00 │
│        43702 │             5 │ 2022-05-31 00:00:00 │
│        43702 │             9 │ 2022-05-31 00:00:00 │
│        43703 │             5 │ 2022-05-31 00:00:00 │
└──────────────┴───────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        27647 │
└──────────────┘



## SalesPerson

In [49]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesPerson (
BusinessEntityID INT NOT NULL primary key, 
TerritoryID INT NULL, 
SalesQuota DECIMAL(18,4) NULL, 
Bonus DECIMAL(18,4) NOT NULL , 
CommissionPct DECIMAL(18,4) NOT NULL , 
SalesYTD DECIMAL(18,4) NOT NULL , 
SalesLastYear DECIMAL(18,4) NOT NULL , 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO SalesPerson
    SELECT *  FROM read_parquet('{parquet_dir}/SalesPerson.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesPerson LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesPerson;")   

if on: con.close()

┌──────────────────┬─────────────┬───────────────┬───────────────┬───────────────┬───────────────┬───────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │ TerritoryID │  SalesQuota   │     Bonus     │ CommissionPct │   SalesYTD    │ SalesLastYear │               rowguid                │    ModifiedDate     │
│      int32       │    int32    │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │               varchar                │      timestamp      │
├──────────────────┼─────────────┼───────────────┼───────────────┼───────────────┼───────────────┼───────────────┼──────────────────────────────────────┼─────────────────────┤
│              274 │        NULL │          NULL │        0.0000 │        0.0000 │   559697.5639 │        0.0000 │ 48754992-9EE0-4C0E-8C94-9451604E3E02 │ 2021-12-27 00:00:00 │
│              275 │           2 │   300000.0000 │     4100.0000 │        0.0120 │  3763178.1787 │  1750406.4785 │ 1E0A7

## SalesPersonQuotaHistory

In [50]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesPersonQuotaHistory (
BusinessEntityID INT NOT NULL, 
QuotaDate TIMESTAMP NOT NULL, 
SalesQuota DECIMAL(18,4) NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (BusinessEntityID, QuotaDate)
);
"""

sql2 = f"""
INSERT INTO SalesPersonQuotaHistory
    SELECT *  FROM read_parquet('{parquet_dir}/SalesPersonQuotaHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesPersonQuotaHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesPersonQuotaHistory;")   

if on: con.close()

┌──────────────────┬─────────────────────┬───────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │      QuotaDate      │  SalesQuota   │               rowguid                │    ModifiedDate     │
│      int32       │      timestamp      │ decimal(18,4) │               varchar                │      timestamp      │
├──────────────────┼─────────────────────┼───────────────┼──────────────────────────────────────┼─────────────────────┤
│              274 │ 2022-05-30 00:00:00 │    28000.0000 │ 99109BBF-8693-4587-BC23-6036EC89E1BE │ 2022-04-15 00:00:00 │
│              274 │ 2022-08-30 00:00:00 │     7000.0000 │ DFD01444-8900-461C-8D6F-04598DAE01D4 │ 2022-07-16 00:00:00 │
│              274 │ 2022-11-30 00:00:00 │    91000.0000 │ 0A69F453-9689-4CCF-A08C-C644670F5668 │ 2022-10-16 00:00:00 │
│              274 │ 2023-02-28 00:00:00 │   140000.0000 │ DA8D1458-5FB9-4C3E-9EAD-8F5CE1393047 │ 2023-01-14 00:00:00 │
│              274 │ 2023-05-30 00:00:00

## SalesReason

In [51]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesReason (
SalesReasonID INT NOT NULL, 
Name TEXT NOT NULL, 
ReasonType TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO SalesReason
    SELECT *  FROM read_parquet('{parquet_dir}/SalesReason.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesReason LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesReason;")   

if on: con.close()

┌───────────────┬───────────────────────────┬────────────┬─────────────────────┐
│ SalesReasonID │           Name            │ ReasonType │    ModifiedDate     │
│     int32     │          varchar          │  varchar   │      timestamp      │
├───────────────┼───────────────────────────┼────────────┼─────────────────────┤
│             1 │ Price                     │ Other      │ 2019-04-30 00:00:00 │
│             2 │ On Promotion              │ Promotion  │ 2019-04-30 00:00:00 │
│             3 │ Magazine Advertisement    │ Marketing  │ 2019-04-30 00:00:00 │
│             4 │ Television  Advertisement │ Marketing  │ 2019-04-30 00:00:00 │
│             5 │ Manufacturer              │ Other      │ 2019-04-30 00:00:00 │
└───────────────┴───────────────────────────┴────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           10 │
└──────────────┘



## SalesTaxRate

In [52]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesTaxRate (
SalesTaxRateID INT NOT NULL primary key, 
StateProvinceID INT NOT NULL, 
TaxType UTINYINT NOT NULL, 
TaxRate DECIMAL(8, 4) NOT NULL,
Name TEXT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO SalesTaxRate
    SELECT *  FROM read_parquet('{parquet_dir}/SalesTaxRate.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesTaxRate LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesTaxRate;")   

if on: con.close()

┌────────────────┬─────────────────┬─────────┬──────────────┬───────────────────────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ SalesTaxRateID │ StateProvinceID │ TaxType │   TaxRate    │                 Name                  │               rowguid                │    ModifiedDate     │
│     int32      │      int32      │  uint8  │ decimal(8,4) │                varchar                │               varchar                │      timestamp      │
├────────────────┼─────────────────┼─────────┼──────────────┼───────────────────────────────────────┼──────────────────────────────────────┼─────────────────────┤
│              1 │               1 │       1 │      14.0000 │ Canadian GST + Alberta Provincial Tax │ 683DE5DD-521A-47D4-A573-06A3CDB1BC5D │ 2019-04-30 00:00:00 │
│              2 │              57 │       1 │      14.2500 │ Canadian GST + Ontario Provincial Tax │ 05C4FFDB-4F84-4CDF-ABE5-FDF3216EA74E │ 2019-04-30 00:00:00 │
│              3 │    

## SalesTerritory

In [53]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesTerritory (
TerritoryID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
CountryRegionCode TEXT NOT NULL, 
Grp TEXT NOT NULL, 
SalesYTD DECIMAL(18,4) NOT NULL, 
SalesLastYear DECIMAL(18,4) NOT NULL , 
CostYTD DECIMAL(18,4) NOT NULL , 
CostLastYear DECIMAL(18,4) NOT NULL , 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO SalesTerritory
    SELECT *  FROM read_parquet('{parquet_dir}/SalesTerritory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesTerritory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesTerritory;")   

if on: con.close()

┌─────────────┬───────────┬───────────────────┬───────────────┬───────────────┬───────────────┬───────────────┬───────────────┬──────────────────────────────────────┬─────────────────────┐
│ TerritoryID │   Name    │ CountryRegionCode │      Grp      │   SalesYTD    │ SalesLastYear │    CostYTD    │ CostLastYear  │               rowguid                │    ModifiedDate     │
│    int32    │  varchar  │      varchar      │    varchar    │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │ decimal(18,4) │               varchar                │      timestamp      │
├─────────────┼───────────┼───────────────────┼───────────────┼───────────────┼───────────────┼───────────────┼───────────────┼──────────────────────────────────────┼─────────────────────┤
│           1 │ Northwest │ US                │ North America │  7887186.7882 │  3298694.4938 │        0.0000 │        0.0000 │ 43689A10-E30B-497F-B0DE-11DE20267FF7 │ 2019-04-30 00:00:00 │
│           2 │ Northeast │ US                │ North A

## SalesTerritoryHistory

In [54]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SalesTerritoryHistory (
BusinessEntityID INT NOT NULL /* A sales person */, 
TerritoryID INT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (BusinessEntityID, StartDate, TerritoryID)
);
"""

sql2 = f"""
INSERT INTO SalesTerritoryHistory
    SELECT *  FROM read_parquet('{parquet_dir}/SalesTerritoryHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SalesTerritoryHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SalesTerritoryHistory;")   

if on: con.close()

┌──────────────────┬─────────────┬─────────────────────┬─────────────────────┬──────────────────────────────────────┬─────────────────────┐
│ BusinessEntityID │ TerritoryID │      StartDate      │       EndDate       │               rowguid                │    ModifiedDate     │
│      int32       │    int32    │      timestamp      │      timestamp      │               varchar                │      timestamp      │
├──────────────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────────────────┼─────────────────────┤
│              275 │           2 │ 2022-05-30 00:00:00 │ 2023-11-29 00:00:00 │ 8563CE6A-00FF-47D7-BA4D-3C3E1CDEF531 │ 2023-11-22 00:00:00 │
│              275 │           3 │ 2023-11-30 00:00:00 │ NULL                │ 2F44304C-EE87-4C72-813E-CA75C5F61F4C │ 2023-11-23 00:00:00 │
│              276 │           4 │ 2022-05-30 00:00:00 │ NULL                │ 64BCB1B3-A793-40BA-9859-D90F78C3F167 │ 2022-05-23 00:00:00 │
│              277 │

## ScrapReason

In [55]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ScrapReason (
ScrapReasonID SMALLINT NOT NULL, 
Name TEXT NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ScrapReason
    SELECT *  FROM read_parquet('{parquet_dir}/ScrapReason.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ScrapReason LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ScrapReason;")   

if on: con.close()

┌───────────────┬───────────────────────────────┬─────────────────────┐
│ ScrapReasonID │             Name              │    ModifiedDate     │
│     int16     │            varchar            │      timestamp      │
├───────────────┼───────────────────────────────┼─────────────────────┤
│             1 │ Brake assembly not as ordered │ 2019-04-30 00:00:00 │
│             2 │ Color incorrect               │ 2019-04-30 00:00:00 │
│             3 │ Gouge in metal                │ 2019-04-30 00:00:00 │
│             4 │ Drill pattern incorrect       │ 2019-04-30 00:00:00 │
│             5 │ Drill size too large          │ 2019-04-30 00:00:00 │
└───────────────┴───────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│           16 │
└──────────────┘



## Shift

In [56]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Shift (
ShiftID UTINYINT NOT NULL, 
Name TEXT NOT NULL, 
StartTime TIME NOT NULL, 
EndTime TIME NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Shift
    SELECT *  FROM read_parquet('{parquet_dir}/Shift.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Shift LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Shift;")   

if on: con.close()

┌─────────┬─────────┬───────────┬──────────┬─────────────────────┐
│ ShiftID │  Name   │ StartTime │ EndTime  │    ModifiedDate     │
│  uint8  │ varchar │   time    │   time   │      timestamp      │
├─────────┼─────────┼───────────┼──────────┼─────────────────────┤
│       1 │ Day     │ 07:00:00  │ 15:00:00 │ 2008-04-30 00:00:00 │
│       2 │ Evening │ 15:00:00  │ 23:00:00 │ 2008-04-30 00:00:00 │
│       3 │ Night   │ 23:00:00  │ 07:00:00 │ 2008-04-30 00:00:00 │
└─────────┴─────────┴───────────┴──────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│            3 │
└──────────────┘



## ShipMethod

In [57]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ShipMethod (
ShipMethodID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
ShipBase DECIMAL(18,4) NOT NULL, 
ShipRate DECIMAL(18,4) NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO ShipMethod
    SELECT *  FROM read_parquet('{parquet_dir}/ShipMethod.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ShipMethod LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ShipMethod;")   

if on: con.close()

┌──────────────┬────────────────────┬───────────────┬───────────────┬──────────────────────────────────────┬─────────────────────┐
│ ShipMethodID │        Name        │   ShipBase    │   ShipRate    │               rowguid                │    ModifiedDate     │
│    int32     │      varchar       │ decimal(18,4) │ decimal(18,4) │               varchar                │      timestamp      │
├──────────────┼────────────────────┼───────────────┼───────────────┼──────────────────────────────────────┼─────────────────────┤
│            1 │ XRQ - TRUCK GROUND │        3.9500 │        0.9900 │ 6BE756D9-D7BE-4463-8F2C-AE60C710D606 │ 2019-04-30 00:00:00 │
│            2 │ ZY - EXPRESS       │        9.9500 │        1.9900 │ 3455079B-F773-4DC6-8F1E-2A58649C4AB8 │ 2019-04-30 00:00:00 │
│            3 │ OVERSEAS - DELUXE  │       29.9500 │        2.9900 │ 22F4E461-28CF-4ACE-A980-F686CF112EC8 │ 2019-04-30 00:00:00 │
│            4 │ OVERNIGHT J-FAST   │       21.9500 │        1.2900 │ 107E8356-E7A8

## ShoppingCartItem

In [58]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE ShoppingCartItem (
ShoppingCartItemID INT NOT NULL primary key, 
ShoppingCartID TEXT NOT NULL, 
Quantity INT NOT NULL CONSTRAINT DF_ShoppingCartItem_Quantity DEFAULT (1), 
ProductID INT NOT NULL, 
DateCreated TIMESTAMP NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO ShoppingCartItem
    SELECT *  FROM read_parquet('{parquet_dir}/ShoppingCartItem.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM ShoppingCartItem LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM ShoppingCartItem;")   

if on: con.close()

┌────────────────────┬────────────────┬──────────┬───────────┬─────────────────────────┬─────────────────────────┐
│ ShoppingCartItemID │ ShoppingCartID │ Quantity │ ProductID │       DateCreated       │      ModifiedDate       │
│       int32        │    varchar     │  int32   │   int32   │        timestamp        │        timestamp        │
├────────────────────┼────────────────┼──────────┼───────────┼─────────────────────────┼─────────────────────────┤
│                  2 │ 14951          │        3 │       862 │ 2024-11-08 17:54:07.603 │ 2024-11-08 17:54:07.603 │
│                  4 │ 20621          │        4 │       881 │ 2024-11-08 17:54:07.603 │ 2024-11-08 17:54:07.603 │
│                  5 │ 20621          │        7 │       874 │ 2024-11-08 17:54:07.603 │ 2024-11-08 17:54:07.603 │
└────────────────────┴────────────────┴──────────┴───────────┴─────────────────────────┴─────────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          

## SpecialOffer

In [59]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SpecialOffer (
SpecialOfferID INT NOT NULL primary key, 
Description TEXT NOT NULL, 
DiscountPct DECIMAL(18,4) NOT NULL , 
Type TEXT NOT NULL, 
Category TEXT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NOT NULL, 
MinQty INT NOT NULL, 
MaxQty INT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO SpecialOffer
    SELECT *  FROM read_parquet('{parquet_dir}/SpecialOffer.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SpecialOffer LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SpecialOffer;")   

if on: con.close()

┌────────────────┬──────────────────────────┬───────────────┬─────────────────┬─────────────┬─────────────────────┬─────────────────────┬────────┬────────┬──────────────────────────────────────┬─────────────────────┐
│ SpecialOfferID │       Description        │  DiscountPct  │      Type       │  Category   │      StartDate      │       EndDate       │ MinQty │ MaxQty │               rowguid                │    ModifiedDate     │
│     int32      │         varchar          │ decimal(18,4) │     varchar     │   varchar   │      timestamp      │      timestamp      │ int32  │ int32  │               varchar                │      timestamp      │
├────────────────┼──────────────────────────┼───────────────┼─────────────────┼─────────────┼─────────────────────┼─────────────────────┼────────┼────────┼──────────────────────────────────────┼─────────────────────┤
│              1 │ No Discount              │        0.0000 │ No Discount     │ No Discount │ 2022-04-30 00:00:00 │ 2025-11-29 00:00

## SpecialOfferProduct

In [60]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE SpecialOfferProduct (
SpecialOfferID INT NOT NULL, 
ProductID INT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL,
primary key (SpecialOfferID, ProductID)
);
"""

sql2 = f"""
INSERT INTO SpecialOfferProduct
    SELECT *  FROM read_parquet('{parquet_dir}/SpecialOfferProduct.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM SpecialOfferProduct LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM SpecialOfferProduct;")   

if on: con.close()

┌────────────────┬───────────┬──────────────────────────────────────┬─────────────────────┐
│ SpecialOfferID │ ProductID │               rowguid                │    ModifiedDate     │
│     int32      │   int32   │               varchar                │      timestamp      │
├────────────────┼───────────┼──────────────────────────────────────┼─────────────────────┤
│              1 │       680 │ BB30B868-D86C-4557-8DB2-4B2D0A83A0FB │ 2022-03-31 00:00:00 │
│              1 │       706 │ B3C9A4B1-2AE6-4CBA-B552-1F206C9F4C1F │ 2022-03-31 00:00:00 │
│              1 │       707 │ 27B711FE-0B77-4EA4-AD1A-7C239956BEF4 │ 2022-03-31 00:00:00 │
│              1 │       708 │ 46CBB78B-246E-4D69-9BD6-E521277C1078 │ 2022-03-31 00:00:00 │
│              1 │       709 │ CF102AA0-055F-4D2B-8B98-04B161758EA8 │ 2022-03-31 00:00:00 │
└────────────────┴───────────┴──────────────────────────────────────┴─────────────────────┘

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          

## StateProvince

In [61]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE StateProvince (
StateProvinceID INT NOT NULL primary key, 
StateProvinceCode TEXT NOT NULL, 
CountryRegionCode TEXT NOT NULL, 
IsOnlyStateProvinceFlag BIT NOT NULL , 
Name TEXT NOT NULL, 
TerritoryID INT NOT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO StateProvince
    SELECT *  FROM read_parquet('{parquet_dir}/StateProvince.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM StateProvince LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM StateProvince;")   

if on: con.close()

┌─────────────────┬───────────────────┬───────────────────┬──────────────────────────────────────────────────────────────────┬────────────────┬─────────────┬──────────────────────────────────────┬─────────────────────────┐
│ StateProvinceID │ StateProvinceCode │ CountryRegionCode │                     IsOnlyStateProvinceFlag                      │      Name      │ TerritoryID │               rowguid                │      ModifiedDate       │
│      int32      │      varchar      │      varchar      │                               bit                                │    varchar     │    int32    │               varchar                │        timestamp        │
├─────────────────┼───────────────────┼───────────────────┼──────────────────────────────────────────────────────────────────┼────────────────┼─────────────┼──────────────────────────────────────┼─────────────────────────┤
│               1 │ AB                │ CA                │ 000000000000000000000000000000000000000000000000

## Store

In [62]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Store (
BusinessEntityID INT NOT NULL primary key, 
Name TEXT NOT NULL, 
SalesPersonID INT NULL, 
Demographics TEXT NULL, 
rowguid TEXT, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Store
    SELECT * exclude(column5), replace(column5, '&|', '')  FROM read_parquet('{parquet_dir}/Store.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Store LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Store;")   

if on: con.close()

┌──────────────────┬────────────────────────────────┬───────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────┬─────────────────────────┐
│ BusinessEntityID │              Name              │ SalesPersonID │                                                                                                                                                                                                              Demographics                                                                                                                                                         

## TransactionHistory

In [63]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE TransactionHistory (
TransactionID INT NOT NULL, 
ProductID INT NOT NULL, 
ReferenceOrderID INT NOT NULL, 
ReferenceOrderLineID INT NOT NULL , 
TransactionDate TIMESTAMP NOT NULL, 
TransactionType TEXT NOT NULL, 
Quantity INT NOT NULL, 
ActualCost DECIMAL(18, 4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (TransactionID)
);
"""

sql2 = f"""
INSERT INTO TransactionHistory
    SELECT *  FROM read_parquet('{parquet_dir}/TransactionHistory.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM TransactionHistory LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM TransactionHistory;")   

if on: con.close()

┌───────────────┬───────────┬──────────────────┬──────────────────────┬─────────────────────┬─────────────────┬──────────┬───────────────┬─────────────────────┐
│ TransactionID │ ProductID │ ReferenceOrderID │ ReferenceOrderLineID │   TransactionDate   │ TransactionType │ Quantity │  ActualCost   │    ModifiedDate     │
│     int32     │   int32   │      int32       │        int32         │      timestamp      │     varchar     │  int32   │ decimal(18,4) │      timestamp      │
├───────────────┼───────────┼──────────────────┼──────────────────────┼─────────────────────┼─────────────────┼──────────┼───────────────┼─────────────────────┤
│        100000 │       784 │            41590 │                    0 │ 2024-07-30 00:00:00 │ W               │        2 │        0.0000 │ 2024-07-30 00:00:00 │
│        100001 │       794 │            41591 │                    0 │ 2024-07-30 00:00:00 │ W               │        1 │        0.0000 │ 2024-07-30 00:00:00 │
│        100002 │       797 │     

## TransactionHistoryArchive

In [64]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE TransactionHistoryArchive (
TransactionID INT NOT NULL primary key, 
ProductID INT NOT NULL, 
ReferenceOrderID INT NOT NULL, 
ReferenceOrderLineID INT NOT NULL CONSTRAINT DF_TransactionHistoryArchive_ReferenceOrderLineID DEFAULT (0), 
TransactionDate TIMESTAMP NOT NULL, 
TransactionType TEXT NOT NULL, 
Quantity INT NOT NULL, 
ActualCost DECIMAL(18, 4) NOT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO TransactionHistoryArchive
    SELECT *  FROM read_parquet('{parquet_dir}/TransactionHistoryArchive.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM TransactionHistoryArchive LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM TransactionHistoryArchive;")   

if on: con.close()

┌───────────────┬───────────┬──────────────────┬──────────────────────┬─────────────────────┬─────────────────┬──────────┬───────────────┬─────────────────────┐
│ TransactionID │ ProductID │ ReferenceOrderID │ ReferenceOrderLineID │   TransactionDate   │ TransactionType │ Quantity │  ActualCost   │    ModifiedDate     │
│     int32     │   int32   │      int32       │        int32         │      timestamp      │     varchar     │  int32   │ decimal(18,4) │      timestamp      │
├───────────────┼───────────┼──────────────────┼──────────────────────┼─────────────────────┼─────────────────┼──────────┼───────────────┼─────────────────────┤
│             1 │         1 │                1 │                    1 │ 2022-04-15 00:00:00 │ P               │        4 │       50.2600 │ 2022-04-15 00:00:00 │
│             2 │       359 │                2 │                    1 │ 2022-04-15 00:00:00 │ P               │        3 │       45.1200 │ 2022-04-15 00:00:00 │
│             3 │       360 │     

## Vendor

In [65]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE Vendor (
BusinessEntityID INT NOT NULL primary key, 
AccountNumber TEXT NOT NULL, 
Name TEXT NOT NULL, 
CreditRating UTINYINT NOT NULL, 
PreferredVendorStatus BIT NOT NULL, 
ActiveFlag BIT NOT NULL , 
PurchasingWebServiceURL TEXT NULL, 
ModifiedDate TIMESTAMP NOT NULL
);
"""

sql2 = f"""
INSERT INTO Vendor
    SELECT *  FROM read_parquet('{parquet_dir}/Vendor.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM Vendor LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM Vendor;")   

if on: con.close()

┌──────────────────┬───────────────┬─────────────────────────┬──────────────┬──────────────────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────┬─────────────────────────┬─────────────────────┐
│ BusinessEntityID │ AccountNumber │          Name           │ CreditRating │                      PreferredVendorStatus                       │                            ActiveFlag                            │ PurchasingWebServiceURL │    ModifiedDate     │
│      int32       │    varchar    │         varchar         │    uint8     │                               bit                                │                               bit                                │         varchar         │      timestamp      │
├──────────────────┼───────────────┼─────────────────────────┼──────────────┼──────────────────────────────────────────────────────────────────┼──────────────────────────────────────────────────────────────────┼─────────

## WorkOrder

In [66]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE WorkOrder (
WorkOrderID INT NOT NULL primary key, 
ProductID INT NOT NULL, 
OrderQty INT NOT NULL, 
StockedQty INT NOT NULL, 
ScrappedQty SMALLINT NOT NULL, 
StartDate TIMESTAMP NOT NULL, 
EndDate TIMESTAMP NULL, 
DueDate TIMESTAMP NOT NULL, 
ScrapReasonID SMALLINT NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
);
"""

sql2 = f"""
INSERT INTO WorkOrder
    SELECT *  FROM read_parquet('{parquet_dir}/WorkOrder.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM WorkOrder LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM WorkOrder;")   

if on: con.close()

┌─────────────┬───────────┬──────────┬────────────┬─────────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────────────┬─────────────────────┐
│ WorkOrderID │ ProductID │ OrderQty │ StockedQty │ ScrappedQty │      StartDate      │       EndDate       │       DueDate       │ ScrapReasonID │    ModifiedDate     │
│    int32    │   int32   │  int32   │   int32    │    int16    │      timestamp      │      timestamp      │      timestamp      │     int16     │      timestamp      │
├─────────────┼───────────┼──────────┼────────────┼─────────────┼─────────────────────┼─────────────────────┼─────────────────────┼───────────────┼─────────────────────┤
│           1 │       722 │        8 │          8 │           0 │ 2022-06-02 00:00:00 │ 2022-06-12 00:00:00 │ 2022-06-13 00:00:00 │          NULL │ 2022-06-12 00:00:00 │
│           2 │       725 │       15 │         15 │           0 │ 2022-06-02 00:00:00 │ 2022-06-12 00:00:00 │ 2022-06-13 00:00:00 │          NULL │ 20

## WorkOrderRouting

In [67]:
if on: con = duckdb_mem_con()

sql1 = """
CREATE TABLE WorkOrderRouting (
WorkOrderID INT NOT NULL, 
ProductID INT NOT NULL, 
OperationSequence SMALLINT NOT NULL, 
LocationID SMALLINT NOT NULL, 
ScheduledStartDate TIMESTAMP NOT NULL, 
ScheduledEndDate TIMESTAMP NOT NULL, 
ActualStartDate TIMESTAMP NULL, 
ActualEndDate TIMESTAMP NULL, 
ActualResourceHrs DECIMAL(9, 4) NULL, 
PlannedCost DECIMAL(18, 4) NOT NULL, 
ActualCost DECIMAL(18, 4) NULL, 
ModifiedDate TIMESTAMP NOT NULL, 
primary key (WorkOrderID, ProductID, OperationSequence)
);
"""

sql2 = f"""
INSERT INTO WorkOrderRouting
    SELECT *  FROM read_parquet('{parquet_dir}/WorkOrderRouting.parquet');
"""

con.execute(sql1)
con.execute(sql2)
sql_print(con, "SELECT * FROM WorkOrderRouting LIMIT 5;")   
sql_print(con, "SELECT count(*) FROM WorkOrderRouting;")   

if on: con.close()

┌─────────────┬───────────┬───────────────────┬────────────┬─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────────────────┬───────────────┬───────────────┬─────────────────────┐
│ WorkOrderID │ ProductID │ OperationSequence │ LocationID │ ScheduledStartDate  │  ScheduledEndDate   │   ActualStartDate   │    ActualEndDate    │ ActualResourceHrs │  PlannedCost  │  ActualCost   │    ModifiedDate     │
│    int32    │   int32   │       int16       │   int16    │      timestamp      │      timestamp      │      timestamp      │      timestamp      │   decimal(9,4)    │ decimal(18,4) │ decimal(18,4) │      timestamp      │
├─────────────┼───────────┼───────────────────┼────────────┼─────────────────────┼─────────────────────┼─────────────────────┼─────────────────────┼───────────────────┼───────────────┼───────────────┼─────────────────────┤
│          13 │       747 │                 1 │         10 │ 2022-06-02 00:00:00 │ 2022-06-13 00:00:00 │ 202

In [68]:
print(duckdb_schema(con))

Table: Address (AddressID INTEGER NOT NULL, AddressLine1 VARCHAR NOT NULL, AddressLine2 VARCHAR NULL, City VARCHAR NOT NULL, StateProvinceID INTEGER NOT NULL, PostalCode VARCHAR NOT NULL, SpatialLocation VARCHAR NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: AddressType (AddressTypeID INTEGER NOT NULL, Name VARCHAR NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BillOfMaterials (BillOfMaterialsID INTEGER NOT NULL, ProductAssemblyID INTEGER NULL, ComponentID INTEGER NOT NULL, StartDate TIMESTAMP NOT NULL, EndDate TIMESTAMP NULL, UnitMeasureCode VARCHAR NOT NULL, BOMLevel SMALLINT NOT NULL, PerAssemblyQty DECIMAL(8,2) NOT NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BusinessEntity (BusinessEntityID INTEGER NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BusinessEntityAddress (BusinessEntityID INTEGER NOT NULL, AddressID INTEGER NOT NULL, AddressTypeID INTEGER NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT N

## Export

In [69]:
p = Path(parquet_dir)
files = sorted([f.name for f in p.iterdir() if f.is_file()])
tns = [Path(f).stem for f in files]


In [74]:
for tn in tns:
    out_f = Path(parquet_clean_dir) / f"{tn}.parquet"
    con.execute(f"COPY (SELECT * FROM {tn}) TO '{str(out_f)}' (FORMAT 'parquet');")
    print(f"exported table {tn}.")

exported table Address.
exported table AddressType.
exported table BillOfMaterials.
exported table BusinessEntity.
exported table BusinessEntityAddress.
exported table BusinessEntityContact.
exported table ContactType.
exported table CountryRegion.
exported table CountryRegionCurrency.
exported table CreditCard.
exported table Culture.
exported table Currency.
exported table CurrencyRate.
exported table Customer.
exported table Department.
exported table EmailAddress.
exported table Employee.
exported table EmployeeDepartmentHistory.
exported table EmployeePayHistory.
exported table Location.
exported table Password.
exported table Person.
exported table PersonCreditCard.
exported table PersonPhone.
exported table PhoneNumberType.
exported table Product.
exported table ProductCategory.
exported table ProductCostHistory.
exported table ProductDescription.
exported table ProductDocument.
exported table ProductInventory.
exported table ProductListPriceHistory.
exported table ProductModel.