### ETL for dim_item
SOURCES: 
1. lpms.listings 
2. lpms.items 
3. ums.sellers

<p align="center">
    <img src="./etl_dim_table_template.png" alt="ETL process for dim tables" width="1200"/>
</p>

In [1]:
# Importing libraries
import sqlite3
import pandas as pd
import numpy as np
import shutil
import os
from datetime import datetime

# For Display
pd.set_option('display.width', 140)
pd.set_option('display.max_rows', 10000)
# pd.set_option('display.max_columns', 20)
# pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_columns', None)

# To refresh all db files
# Copy all files from og_db_file folder to raw_db
for file_name in os.listdir('og_db_file'):
    full_file_name = os.path.join('og_db_file', file_name)
    if os.path.isfile(full_file_name):
        shutil.copy(full_file_name, 'raw_db')

#### A. Extracting from source table

In [2]:
# Check content of the source database
lpms_conn = sqlite3.connect('raw_db/lpms.db')
source_tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", lpms_conn)
print("Source Database Tables:")
print(source_tables)

for table in source_tables['name']:
    print(f"\nContent of {table} table in source database:")
    print(pd.read_sql_query(f"SELECT * FROM {table} LIMIT 10", lpms_conn))

# Check content of the ums database
ums_conn = sqlite3.connect('raw_db/ums.db')
ums_tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", ums_conn)
print("\nUMS Database Tables:")
print(ums_tables)

for table in ums_tables['name']:
    print(f"\nContent of {table} table in ums database:")
    print(pd.read_sql_query(f"SELECT * FROM {table} LIMIT 10", ums_conn))

# Check content of the ewip_dw database
ewip_dw_conn = sqlite3.connect('raw_db/ewip_dw.db')
ewip_dw_tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", ewip_dw_conn)
print("\nEWIP_DW Database Tables:")
print(ewip_dw_tables)

for table in ewip_dw_tables['name']:
    print(f"\nContent of {table} table in ewip_dw database:")
    print(pd.read_sql_query(f"SELECT * FROM {table} LIMIT 10", ewip_dw_conn))

# Establish connection to the source database
s1 = lpms_conn.cursor()
lpms_conn.commit()
s2 = ums_conn.cursor()
ums_conn.commit()

# Establish connection to the DW database
c = ewip_dw_conn.cursor()
ewip_dw_conn.commit()

Source Database Tables:
       name
0  listings
1     items

Content of listings table in source database:
                             listing_id                               shop_id                              model_id        model_name  \
0  6c596216-ae0f-4bc8-a36b-cb0167e98363  f97ccc57-ce5d-4807-a025-719990823eda  905c053b-25fd-4cbe-bce7-1b48fba52e59         Shirt Pro   
1  a2939b3b-7fa7-4d8a-bf88-ec827f99d273  8f928dc5-1972-4ce3-9bd0-94486a2b3200  d5627386-528c-4241-a345-ac72eac39204    Smartphone Max   
2  bf3df0bb-f66a-4168-b4a1-ca795718ada2  96fd35d0-adf2-4806-a521-460637176e84  027c013f-3801-4399-ae6a-8e2f9c19ed34      Camera Basic   
3  dfde2281-25fb-4f3d-866d-7002091472ad  9558867f-5ba9-4faf-ba02-4204f7c1bd87  52631db9-d170-44ce-9179-7350e6256403       Camera Lite   
4  786e30ef-ce9b-4e70-b4d4-dfccb7d779cc  4d6b234f-dfa7-46ed-b2d1-f81ba636425c  4b5ca436-953c-478e-a106-7a8cd7a3283c    Smartphone Max   
5  4cea2df0-0a66-4c4e-a168-1081399f8a8f  9558867f-5ba9-4faf-ba02-4204f7

In [3]:
# Creating S_dim_item 
# Executed only once
c.execute('''DROP TABLE IF EXISTS S_dim_item''')
c.execute('''CREATE TABLE S_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS M_dim_item''')
c.execute('''CREATE TABLE M_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS X_dim_item''')
c.execute('''CREATE TABLE X_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS E_dim_item''')
c.execute('''CREATE TABLE E_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS C_dim_item''')
c.execute('''CREATE TABLE C_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS T_dim_item''')
c.execute('''CREATE TABLE T_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS I_dim_item''')
c.execute('''CREATE TABLE I_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN,
    is_latest_record BOOLEAN
)''')
c.execute('''DROP TABLE IF EXISTS U_dim_item''')
c.execute('''CREATE TABLE U_dim_item(
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN,
    is_latest_record BOOLEAN
)''')
c.execute('''ALTER TABLE dim_item ADD COLUMN is_latest_record BOOLEAN''')
ewip_dw_conn.commit()

          

In [4]:
# Extract the data from the source database
# listing
s1.execute('SELECT * FROM listings')
listing = s1.fetchall()

# items
s1.execute('SELECT * FROM items')
items = s1.fetchall()

# sellers
s2.execute('SELECT * FROM sellers')
sellers = s2.fetchall()

# Convert each fetched data to pandas DataFrame
listing_df = pd.DataFrame(listing, columns=['listing_listing_id', 'listing_shop_id', 'model_id', 'model_name', 'model_description', 'category_lvl_1', 'category_lvl_2', 'create_time_listing', 'banned_time', 'banned_by','last_modified_time'])
items_df = pd.DataFrame(items, columns=['sku_id', 'items_shop_id','listing_id', 'model_id', 'item_id','item_description', 'stock_qty', 'weight', 'length', 'width', 'height', 'item_price', 'is_active', 'create_time', 'last_modified_time'])
sellers_df = pd.DataFrame(sellers, columns=['user_id', 'shop_id', 'shop_name', 'shop_category', 'shop_create_time', 'is_active_shop', 'last_modified_time', 'is_wh'])

# Merging the 3 DataFrames
staging = pd.merge(listing_df, items_df, on='model_id', how='inner')
staging = pd.merge(staging, sellers_df, left_on='listing_shop_id', right_on='shop_id', how='inner')
staging.head()

# Display the DataFrames
# print("Listings DataFrame:")
# print(listing_df.head())
# print("\nItems DataFrame:")
# print(items_df.head())
# print("\nSellers DataFrame:")
# print(sellers_df.head())

# Insert into the Staging tables (in this case the source)
# Remove existing data inside the staging table for items
c.execute('DELETE FROM S_dim_item')

# Insert data into S_dim_item
for index, row in staging.iterrows():
    c.execute('''
        INSERT INTO S_dim_item (sku_id, shop_id, listing_id, model_name, model_description, category_lvl_1, category_lvl_2, model_id, item_id, item_description, weight, length, width, height, item_price, is_active, create_time, banned_time, last_modified_time, is_wh)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', 
        tuple(row[['sku_id', 'shop_id', 'listing_id', 'model_name', 'model_description', 'category_lvl_1', 'category_lvl_2', 'model_id', 'item_id', 'item_description', 'weight', 'length', 'width', 'height', 'item_price', 'is_active', 'create_time', 'banned_time', 'last_modified_time', 'is_wh']]))


# Select all data from S_dim_item to verify insertion
c.execute('SELECT * FROM S_dim_item')

# Fetch all data from the cursor
rows = c.fetchall()
ewip_dw_conn.commit()
df_s_dim_item = pd.DataFrame(rows, columns=[desc[0] for desc in c.description])
df_s_dim_item.head()

Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### B. Compare New and Changed Data

In [5]:
# Get New and Changed Data from the Staging Table compared with the Master Data 

# New data from S_dim_item not present in M_dim_item
s_table_new_data_df = pd.read_sql("""
    SELECT * 
    FROM S_dim_item 
    WHERE sku_id NOT IN (SELECT sku_id FROM M_dim_item)
    """, ewip_dw_conn)

# Changed data in S_dim_item compared to M_dim_item
s_table_changed_data_df = pd.read_sql("""
    SELECT s.*
    FROM S_dim_item s 
    INNER JOIN M_dim_item m 
    ON s.sku_id = m.sku_id
    WHERE s.model_name != m.model_name
        OR s.model_description != m.model_description
        OR s.category_lvl_1 != m.category_lvl_1
        OR s.category_lvl_2 != m.category_lvl_2
        OR s.item_description != m.item_description
        OR s.weight != m.weight
        OR s.length != m.length
        OR s.width != m.width
        OR s.height != m.height
        OR s.item_price != m.item_price
        OR s.is_active != m.is_active
        OR s.banned_time != m.banned_time
        OR s.is_wh != m.is_wh
    """, ewip_dw_conn)

# Combine new and changed data
s_table_extract_df = pd.concat([s_table_new_data_df, s_table_changed_data_df], ignore_index=True)

# Display the extracted data
s_table_extract_df.head()

  s_table_extract_df = pd.concat([s_table_new_data_df, s_table_changed_data_df], ignore_index=True)


Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### C. Insert into Extract Tables

In [6]:
# INSERT INTO X tables (X Tables are the extract tables)
# Delete data inside the X table first, if any
delete_x_data = c.execute('DELETE FROM X_dim_item')
ewip_dw_conn.commit()
c.execute('SELECT * FROM X_dim_item')
c.fetchall()

[]

In [7]:
# INSERT INTO Xitems from the Staging table (S_Table)
#Creating column list for insertion
cols = '","'.join([str(i) for i in s_table_extract_df.columns.tolist()])

#Insert records one by one INTO X_dim_item
for i, row in s_table_extract_df.iterrows():
	sql = f'INSERT INTO X_dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
c.execute("SELECT * FROM X_dim_item")
rows = c.fetchall()
df_x_dim_item = pd.DataFrame(rows, columns=[desc[0] for desc in c.description])
df_x_dim_item.head()

Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### D. Clean X Table and Insert Into Error Table

In [8]:
# CLEAN X TABLE and INSERT INTO ERROR TABLE (E Table)

# Select rows with null values for non-nullable columns
x_table_null_violations = pd.read_sql("""
SELECT * FROM X_dim_item 
WHERE sku_id IS NULL
	OR shop_id IS NULL
	OR listing_id IS NULL
	OR model_name IS NULL
	OR model_description IS NULL
	OR category_lvl_1 IS NULL
	OR model_id IS NULL
	OR item_id IS NULL
	OR item_description IS NULL
	OR item_price IS NULL
	OR is_active IS NULL
	OR create_time IS NULL
	OR last_modified_time IS NULL
""", ewip_dw_conn)
x_table_null_violations['ErrorType'] = 'Null values in non-nullable columns'

# Select duplicated rows
x_table_duplicate_sku_df = pd.read_sql("""
SELECT * FROM X_dim_item
WHERE sku_id IN (
	SELECT sku_id  FROM X_dim_item
	GROUP BY sku_id  
    HAVING COUNT(sku_id ) > 1
)""", ewip_dw_conn)
x_table_duplicate_sku_df['ErrorType'] = 'Duplicate Company Name'

# Combine errors into one dataframe
x_table_errors_df = pd.concat([x_table_null_violations, x_table_duplicate_sku_df])

# Cleaning
# Set Unknown blank is_wh to 0
update_xitems = c.execute("UPDATE X_dim_item SET is_wh = 0 WHERE is_wh IS NULL")
c.execute("SELECT * FROM X_dim_item")
c.fetchall()
# Other cleaning operations depending on the actual data
	
# Delete data inside E_dim_item first
delete_eitems = c.execute('DELETE FROM E_dim_item')
c.execute("SELECT * FROM E_dim_item")
c.fetchall()

[]

In [9]:
# Creating column list for insertion
cols = '","'.join([str(i) for i in x_table_errors_df.columns.tolist()])

# Insert records one by one INTO E_dim_item
for i, row in x_table_errors_df.iterrows():
	sql = "INSERT INTO E_dim_item (sku_id, CompanyName, Phone, ErrorType) VALUES (" + ','.join(['?'] * len(row)) + ")"
	c.execute(sql, tuple(row))

ewip_dw_conn.commit()

# Check if inserted
c.execute("SELECT * FROM E_dim_item")
c.fetchall()


[]

#### E. Process Clean Data and Insert Into C Table


In [10]:
# Process Clean Data
#Select Clean Data
x_table_clean_data_df = pd.read_sql("""
SELECT *
FROM X_dim_item
WHERE sku_id NOT IN (SELECT sku_id FROM E_dim_item)
""", ewip_dw_conn)

#DELETE existing data in C table
delete_citems = c.execute('DELETE FROM C_dim_item')
c.execute("SELECT * FROM C_dim_item")
c.fetchall()

[]

In [11]:
# Actual INSERT INTO C Table
# Creating column list for insertion
cols = '","'.join([str(i) for i in x_table_clean_data_df.columns.tolist()])

# Insert records one by one INTO C_dim_item
for i, row in x_table_clean_data_df.iterrows():
	sql = f'INSERT INTO C_dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
c.execute("SELECT * FROM C_dim_item")
rows = c.fetchall()
df_c_dim_item = pd.DataFrame(rows, columns=[desc[0] for desc in c.description])
df_c_dim_item.head()

Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### F. Update Master Table (M Table)


In [12]:
	# INSERT Clean data into M Table

	# Select All NEW From C Tables
	c_table_new_date_df = pd.read_sql("""
	SELECT * FROM C_dim_item c
	WHERE c.sku_id NOT IN
	(SELECT m.sku_id FROM M_dim_item m)
	""", ewip_dw_conn)

	# Creating column list for insertion
	cols = '","'.join([str(i) for i in c_table_new_date_df.columns.tolist()])

	# Insert records one by one INTO M_dim_item
	for i, row in c_table_new_date_df.iterrows():
		sql = f'INSERT INTO M_dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
		c.execute(sql, tuple(row))

	# The connection is not autocommitted by default, so we must commit to save our changes
	ewip_dw_conn.commit()

	# Check if inserted
	c.execute("SELECT * FROM M_dim_item")
	rows = c.fetchall()
	df_m_dim_item = pd.DataFrame(rows, columns=[desc[0] for desc in c.description])
	df_m_dim_item.head()


Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


In [13]:
import pandas as pd
import sqlite3

# Processing Changed Data and Update the master Data.

#Select ALL Changed from C Table
c_table_changed_data_df = pd.read_sql("""
SELECT c.*
FROM C_dim_item AS c
JOIN M_dim_item AS m ON c.sku_id = m.sku_id
WHERE c.model_name != m.model_name
        OR c.model_description != m.model_description
        OR c.category_lvl_1 != m.category_lvl_1
        OR c.category_lvl_2 != m.category_lvl_2
        OR c.item_description != m.item_description
        OR c.weight != m.weight
        OR c.length != m.length
        OR c.width != m.width
        OR c.height != m.height
        OR c.item_price != m.item_price
        OR c.is_active != m.is_active
        OR c.banned_time != m.banned_time
        OR c.is_wh != m.is_wh
""", ewip_dw_conn)

# Delete from M_dim_item_Test where data has changed
delete_updated_data = c.execute("""
DELETE FROM M_dim_item
WHERE sku_id IN (
        SELECT c.sku_id
        FROM C_dim_item AS c
        JOIN M_dim_item AS m ON c.sku_id = m.sku_id
        WHERE c.model_name != m.model_name
                OR c.model_description != m.model_description
                OR c.category_lvl_1 != m.category_lvl_1
                OR c.category_lvl_2 != m.category_lvl_2
                OR c.item_description != m.item_description
                OR c.weight != m.weight
                OR c.length != m.length
                OR c.width != m.width
                OR c.height != m.height
                OR c.item_price != m.item_price
                OR c.is_active != m.is_active
                OR c.banned_time != m.banned_time
                OR c.is_wh != m.is_wh
)
""")
print("Deleted from M_dim_item which are updated:")
print(c_table_changed_data_df)
print(f"\n")

# Verify deletion
result = c.execute("""
        SELECT * FROM M_dim_item WHERE sku_id IN (
                SELECT c.sku_id
                FROM C_dim_item AS c
                JOIN M_dim_item AS m ON c.sku_id = m.sku_id
                WHERE c.model_name != m.model_name
                        OR c.model_description != m.model_description
                        OR c.category_lvl_1 != m.category_lvl_1
                        OR c.category_lvl_2 != m.category_lvl_2
                        OR c.item_description != m.item_description
                        OR c.weight != m.weight
                        OR c.length != m.length
                        OR c.width != m.width
                        OR c.height != m.height
                        OR c.item_price != m.item_price
                        OR c.is_active != m.is_active
                        OR c.banned_time != m.banned_time
                        OR c.is_wh != m.is_wh
        )
""").fetchall()
ewip_dw_conn.commit()
df_m_dim_item = pd.DataFrame(result, columns=[desc[0] for desc in c.description])
df_m_dim_item.head()



Deleted from M_dim_item which are updated:
Empty DataFrame
Columns: [sku_id, shop_id, listing_id, model_name, model_description, category_lvl_1, category_lvl_2, model_id, item_id, item_description, weight, length, width, height, item_price, is_active, create_time, banned_time, last_modified_time, is_wh]
Index: []




Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh


In [14]:
# INSERT Clean Data INTO M Table with changed data
# Creating column list for insertion
cols = '","'.join([str(i) for i in c_table_changed_data_df.columns.tolist()])

# Insert records one by one INTO M_dim_item
for i, row in c_table_changed_data_df.iterrows():
    sql = 'INSERT INTO M_dim_item ("' + cols + '") VALUES (' + ','.join(['?'] * len(row)) + ')'
    c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
c.execute("SELECT * FROM M_dim_item")
rows = c.fetchall()
df_m_dim_item = pd.DataFrame(rows, columns=[desc[0] for desc in c.description])
df_m_dim_item.head()


Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### G. Initiate Transform Processes

In [15]:
# Transform Processes
# Select data from C and Transform to DW Format
# For this case, no needed transformation
c_table_data_df = pd.read_sql("""
    SELECT * FROM C_dim_item
""", ewip_dw_conn)


In [16]:
# INSERT INTO T Table
# DELETE existing data in T table
delete_titems = c.execute('DELETE FROM T_dim_item')
c.execute("SELECT * FROM T_dim_item")
c.fetchall()

[]

In [17]:
# Actual INSERT C Table data into I Table 
# Creating column list for insertion
cols = '","'.join([str(i) for i in c_table_data_df.columns.tolist()])

# Insert records one by one INTO T_dim_item
for i, row in c_table_data_df.iterrows():
    sql = f'INSERT INTO T_dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
    c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
pd.read_sql("SELECT * FROM T_dim_item", ewip_dw_conn).head()

Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0


#### H. Select Data from T Tale and Insert to I and U Table

In [18]:
# SELECT New data from the T Table
t_table_new_data_df = pd.read_sql("""
SELECT t.*
FROM t_dim_item t
LEFT JOIN dim_item d ON t.sku_id = d.sku_id
WHERE d.sku_id IS NULL
""", ewip_dw_conn)
t_table_new_data_df['is_latest_record'] = 1



In [19]:
# INSERT New Data INTO I Table

# DELETE existing data in I table
delete_i_dim_item= c.execute('DELETE FROM I_dim_item')
c.execute("SELECT * FROM I_dim_item")
c.fetchall()

# Creating column list for insertion
cols = '","'.join([str(i) for i in t_table_new_data_df.columns.tolist()])

# Insert records one by one INTO I_dim_item
for i, row in t_table_new_data_df.iterrows():
	sql = f'INSERT INTO I_dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
pd.read_sql("SELECT * FROM I_dim_item", ewip_dw_conn).head()

Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record
0,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0,1
1,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0,1
2,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-02-21 00:00:00,0,1
3,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-02-21 00:00:00,0,1
4,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-02-21 00:00:00,0,1


In [20]:
# INSERT Changed Data INTO U Table
#Select Changed from T
t_table_changed_data_df = pd.read_sql('''
SELECT t.*
FROM t_dim_item t
INNER JOIN dim_item d
ON t.sku_id = d.sku_id
WHERE (t.model_name != d.model_name
        OR t.model_description != d.model_description
        OR t.category_lvl_1 != d.category_lvl_1
        OR t.category_lvl_2 != d.category_lvl_2
        OR t.item_description != d.item_description
        OR t.weight != d.weight
        OR t.length != d.length
        OR t.width != d.width
        OR t.height != d.height
        OR t.item_price != d.item_price
        OR t.is_active != d.is_active
        OR t.banned_time != d.banned_time
        OR t.is_wh != d.is_wh)
    AND is_latest_record = 1
''', ewip_dw_conn)
t_table_changed_data_df['is_latest_record'] = 1

#Delete existing data from the U Table first
delete_uitems = c.execute('DELETE FROM U_dim_item')
c.execute("SELECT * FROM U_dim_item")
c.fetchall()

[]

In [21]:
# Actual INSERT of Changed data into U table
#INSERT Changed Data INTO U
#Creating column list for insertion
cols = "','".join([str(i) for i in t_table_changed_data_df.columns.tolist()])

#Insert records one by one INTO U_dim_item
for i, row in t_table_changed_data_df.iterrows():
	sql = "INSERT INTO U_dim_item ('" + cols + "') VALUES (" + ','.join(['?'] * len(row)) + ")"
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
pd.read_sql("SELECT * FROM U_dim_item", ewip_dw_conn)


Unnamed: 0,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record


#### I. Insert I Table into D Table

In [22]:
# INSERT I INTO D Table
# Get Max Warehouse Key
maxkey = pd.read_sql('''SELECT COALESCE(MAX(sku_key), 0) as MAX FROM dim_item''', ewip_dw_conn)

# Select Data to be INSERTED from I Table
i_table_data_df = pd.read_sql("SELECT * FROM I_dim_item", ewip_dw_conn)

# Identify the next set of itemKey’s to be assigned to the New Data from I Table 
if not i_table_data_df.empty and not maxkey.empty:
    start_value = pd.to_numeric(maxkey.iloc[0]).values + 1
    i_table_data_df['sku_key'] = np.arange(start_value, start_value + len(i_table_data_df))
else:
    print("Either the data table or maxkey is empty. No operation performed.")

# Rearrange according to the D table format of columns
i_table_data_df = i_table_data_df[['sku_key', 'sku_id','shop_id', 'listing_id', 'model_name', 'model_description', 'category_lvl_1', 'category_lvl_2','model_id', 'item_id', 'item_description', 
                                   'weight', 'length', 'width', 'height','item_price', 'is_active', 'create_time', 'banned_time', 'last_modified_time','is_wh', 'is_latest_record']]

# Changing last_modified_time to current time
i_table_data_df[['last_modified_time']] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
i_table_data_df.head()

  i_table_data_df['sku_key'] = np.arange(start_value, start_value + len(i_table_data_df))


Unnamed: 0,sku_key,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record
0,1,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
1,2,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
2,3,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
3,4,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-12-22 20:56:02,0,1
4,5,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-12-22 20:56:02,0,1


In [23]:
# Now INSERT into D Table
# Creating column list for insertion
cols = '","'.join([str(i) for i in i_table_data_df.columns.tolist()])

# Insert records one by one INTO D_dim_item
for i, row in i_table_data_df.iterrows():
	sql = f'INSERT INTO dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
pd.read_sql("SELECT * FROM dim_item", ewip_dw_conn).head()

Unnamed: 0,sku_key,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record
0,1,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
1,2,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
2,3,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
3,4,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-12-22 20:56:02,0,1
4,5,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-12-22 20:56:02,0,1


#### J. Insert U Table Data (Type 2) into D Table

- Type 1 SCD: Overwrites the existing data with new data.
- Type 2 SCD: Creates a new record for each change, preserving the history of changes.
- Type 3 SCD: Adds new columns to the table to store the previous values of the changed attributes.




In [24]:
#%% Select Type 2 from U table and then update the D table

# Get Max Warehouse Key
maxkey = pd.read_sql('''SELECT MAX(sku_key) as MAX FROM dim_item''', ewip_dw_conn)

# Select Data to be INSERTED from U Table
u_table_type2_data_df = pd.read_sql('''
	SELECT u.*
	FROM u_dim_item u
	INNER JOIN dim_item d
	ON u.sku_id = d.sku_id
	WHERE (u.model_name != d.model_name
			OR u.model_description != d.model_description
			OR u.category_lvl_1 != d.category_lvl_1
			OR u.category_lvl_2 != d.category_lvl_2
			OR u.item_description != d.item_description
			OR u.weight != d.weight
			OR u.length != d.length
			OR u.width != d.width
			OR u.height != d.height
			OR u.item_price != d.item_price
			OR u.is_active != d.is_active
			OR u.banned_time != d.banned_time
			OR u.is_wh != d.is_wh)
		AND d.is_latest_record = 1
''', ewip_dw_conn)

# Identify the next set of item_Key's to be assigned to the New Data from U Table
u_table_type2_data_df['sku_key'] = np.arange(pd.to_numeric(maxkey.iloc[0].values) + 1,
												 pd.to_numeric(maxkey.iloc[0].values) + 1 + len(u_table_type2_data_df))

# Rearrange according to the D table format of columns
u_table_type2_data_df = u_table_type2_data_df[['sku_key', 'sku_id','shop_id', 'listing_id', 'model_name', 'model_description', 'category_lvl_1', 'category_lvl_2','model_id', 'item_id', 'item_description', 
                                   'weight', 'length', 'width', 'height','item_price', 'is_active', 'create_time', 'banned_time', 'last_modified_time','is_wh', 'is_latest_record']]

# Changing last_modified_time to current time
u_table_type2_data_df[['last_modified_time']] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
u_table_type2_data_df.head()


  u_table_type2_data_df['sku_key'] = np.arange(pd.to_numeric(maxkey.iloc[0].values) + 1,


Unnamed: 0,sku_key,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record


In [25]:
# Now INSERT CHANGED data (Type 2) from U Table into D Table

# Creating column list for insertion
cols = '","'.join([str(i) for i in u_table_type2_data_df.columns.tolist()])

# Insert records from the U Table one by one INTO D_dim_item
for i, row in u_table_type2_data_df.iterrows():
	sql = f'INSERT INTO dim_item ("{cols}") VALUES ({",".join(["?"] * len(row))})'
	c.execute(sql, tuple(row))

# The connection is not autocommitted by default, so we must commit to save our changes
ewip_dw_conn.commit()

# Check if inserted
pd.read_sql("SELECT * FROM dim_item", ewip_dw_conn).head()

Unnamed: 0,sku_key,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record
0,1,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
1,2,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
2,3,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
3,4,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-12-22 20:56:02,0,1
4,5,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-12-22 20:56:02,0,1


#### K. Update Indicators to Current in D Table

In [26]:
# Update is_latest_record indicator in D Table 
c.execute('''
UPDATE dim_item
SET is_latest_record = 0
WHERE (sku_key, last_modified_time) NOT IN (
	SELECT sku_key, MAX(last_modified_time)
	FROM dim_item
	GROUP BY sku_key
)
''')
ewip_dw_conn.commit()

pd.read_sql("SELECT * FROM dim_item", ewip_dw_conn).head()


Unnamed: 0,sku_key,sku_id,shop_id,listing_id,model_name,model_description,category_lvl_1,category_lvl_2,model_id,item_id,item_description,weight,length,width,height,item_price,is_active,create_time,banned_time,last_modified_time,is_wh,is_latest_record
0,1,bb5f15c1-b044-4ebb-8445-17ef1c769e1f,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,15db4487-6038-43f3-ab2b-cfda9d4319b5,"Shirt Pro in White color, made from Wood.",1.61,17.52,23.78,6.25,299.68,0,2024-12-26 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
1,2,ef89c2e7-aa17-4666-9b6e-83a96f2af3b2,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2449213a-c2fa-4184-a232-4672f64e8f4a,"Shirt Pro in White color, made from Plastic.",1.72,24.53,7.97,16.6,1347.02,0,2024-01-06 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
2,3,ff9e16ef-3a0b-4373-967f-fb07cbc96874,f97ccc57-ce5d-4807-a025-719990823eda,6c596216-ae0f-4bc8-a36b-cb0167e98363,Shirt Pro,High-quality shirt pro designed for performance.,Books,Non-Fiction,905c053b-25fd-4cbe-bce7-1b48fba52e59,2513be03-7e8f-4d3d-bde6-0dd454b3f5c1,"Shirt Pro in White color, made from Glass.",4.61,47.8,39.84,27.82,4000.58,1,2024-04-05 00:00:00,2024-07-14 00:00:00,2024-12-22 20:56:02,0,1
3,4,52c1b3c7-f33c-4233-8833-b07eb4d56ffc,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,039d60ba-9c99-4735-95a6-52a708fce7af,"Chair Max in Blue color, made from Glass.",4.91,29.32,45.85,21.11,468.68,1,2024-06-13 00:00:00,,2024-12-22 20:56:02,0,1
4,5,6bc1e83d-96c9-454e-8224-6f10ce65b211,f97ccc57-ce5d-4807-a025-719990823eda,53125ffd-f655-460b-9d32-e231eb561699,Chair Max,High-quality chair max designed for performance.,Electronics,Mobile Phones,7f22cd12-07b6-408e-aac1-ca75afb918c8,3bd4258c-9a3b-47be-b26b-f956f871af48,"Chair Max in Black color, made from Glass.",2.15,44.08,8.06,3.7,2854.42,1,2024-01-03 00:00:00,,2024-12-22 20:56:02,0,1
