In [26]:
import os
import pyodbc
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv

In [27]:
load_dotenv()

True

In [28]:
def get_db_connection(DB_NAME:str):
    conn = pyodbc.connect(
        f"DRIVER={os.getenv('DB_DRIVER')};"
        f"SERVER={os.getenv('DB_SERVER')};"
        f"DATABASE={DB_NAME};"
        f"UID={os.getenv('DB_UID')};"
        f"PWD={os.getenv('DB_PWD')};"
        f"TrustServerCertificate={os.getenv('TRUST_SERVER_CERTIFICATE')};"
    )
    
    return conn


Save all tables relate to products to SQL Server

In [29]:
conn = get_db_connection(os.getenv('DB_NAME'))
cursor = conn.cursor()

In [30]:
def nan_to_none(df: pd.DataFrame):
    return df.where(pd.notnull(df), None)

In [31]:
category_df = pd.read_csv('./transformedData/category.csv', encoding='utf-8')
product_df = pd.read_csv('./transformedData/product.csv', encoding='utf-8')
attribute_df = pd.read_csv('./transformedData/attribute.csv', encoding='utf-8')
attribute_value_df = pd.read_csv('./transformedData/attribute_value.csv', encoding='utf-8')
product_variant_df = pd.read_csv('./transformedData/product_variant.csv', encoding='utf-8')
attribute_variant_df = pd.read_csv('./transformedData/attribute_variant.csv', encoding='utf-8')
feedback_df = pd.read_csv('./transformedData/feedback.csv', encoding='utf-8')
feedback_response_df = pd.read_csv('./transformedData/feedback_response.csv', encoding='utf-8')
discount_df = pd.read_csv('./transformedData/discount.csv', encoding='utf-8')
order_df = pd.read_csv('./transformedData/order.csv', encoding='utf-8')
order_item_df = pd.read_csv('./transformedData/order_item.csv', encoding='utf-8')
order_history_df = pd.read_csv('./transformedData/order_history.csv', encoding='utf-8')

In [32]:
conn.execute("""
    IF OBJECT_ID('order_history', 'U') IS NOT NULL DROP TABLE order_history;

    IF OBJECT_ID('order_item', 'U') IS NOT NULL DROP TABLE order_item;

    IF OBJECT_ID('order', 'U') IS NOT NULL DROP TABLE [order];   

    IF OBJECT_ID('discount', 'U') IS NOT NULL DROP TABLE discount;

    IF OBJECT_ID('feedback_response', 'U') IS NOT NULL DROP TABLE feedback_response;

    IF OBJECT_ID('feedback', 'U') IS NOT NULL DROP TABLE feedback;

    IF OBJECT_ID('attribute_variant', 'U') IS NOT NULL DROP TABLE attribute_variant;

    IF OBJECT_ID('product_variant', 'U') IS NOT NULL DROP TABLE product_variant;

    IF OBJECT_ID('attribute_value', 'U') IS NOT NULL DROP TABLE attribute_value;

    IF OBJECT_ID('attribute', 'U') IS NOT NULL DROP TABLE attribute;

    IF OBJECT_ID('product', 'U') IS NOT NULL DROP TABLE product;

    IF OBJECT_ID('category', 'U') IS NOT NULL DROP TABLE category; 
""")

<pyodbc.Cursor at 0x2b230ff5f30>

In [33]:
conn.execute("""
    CREATE TABLE category (
        id INT PRIMARY KEY IDENTITY(1,1),
        name NVARCHAR(255) NOT NULL,
    )
""")

conn.execute("""
    CREATE TABLE product (
        id INT PRIMARY KEY IDENTITY(1,1),
        category_id INT NOT NULL,
        name NVARCHAR(255) NOT NULL,
        description NVARCHAR(MAX),
        specification NVARCHAR(MAX),
        image_url NVARCHAR(MAX),
        brand NVARCHAR(255),
        CONSTRAINT fk_product_category FOREIGN KEY (category_id) REFERENCES category(id)
    )
""")

conn.execute("""
    CREATE TABLE attribute (
        id INT PRIMARY KEY IDENTITY(1,1),
        name NVARCHAR(255) NOT NULL
    )
""")

conn.execute("""
    CREATE TABLE attribute_value (
        id INT PRIMARY KEY IDENTITY(1,1),
        attribute_id INT NOT NULL,
        value NVARCHAR(255) NOT NULL,
        CONSTRAINT fk_attribute_value_attribute FOREIGN KEY (attribute_id) REFERENCES attribute(id)
    )
""")

conn.execute("""
    CREATE TABLE product_variant (
        id INT PRIMARY KEY IDENTITY(1,1),
        product_id INT NOT NULL,
        price DECIMAL(19,3) NOT NULL,
        original_price DECIMAL(19,3) NOT NULL,
        sku NVARCHAR(100),
        stock_quantity INT DEFAULT 0,
        sold_quantity INT DEFAULT 0,
        CONSTRAINT fk_product_variant_product FOREIGN KEY (product_id) REFERENCES product(id)
    )
""")

conn.execute("""
    CREATE TABLE attribute_variant (
        attribute_value_id INT NOT NULL,
        attribute_id INT NOT NULL,
        product_variant_id INT NOT NULL,
        CONSTRAINT pk_variant_attribute PRIMARY KEY (attribute_value_id, attribute_id, product_variant_id),
        CONSTRAINT fk_variant_attribute_attribute_value FOREIGN KEY (attribute_value_id) REFERENCES attribute_value(id),
        CONSTRAINT fk_variant_attribute_attribute FOREIGN KEY (attribute_id) REFERENCES attribute(id),
        CONSTRAINT fk_variant_attribute_product_variant FOREIGN KEY (product_variant_id) REFERENCES product_variant(id)
    )
""")

conn.execute("""
    CREATE TABLE feedback (
        id INT PRIMARY KEY IDENTITY(1,1),
        customer_id INT NOT NULL,
        product_id INT NOT NULL,
        product_variant_id INT NULL,
        rating INT CHECK (rating BETWEEN 1 AND 5),
        comment NVARCHAR(MAX) NULL,
        created_at DATETIME DEFAULT GETDATE(),
        CONSTRAINT fk_feedback_product FOREIGN KEY (product_id) REFERENCES product(id),
        CONSTRAINT fk_feedback_product_variant FOREIGN KEY (product_variant_id) REFERENCES product_variant(id)
    )
""")

conn.execute("""
    CREATE TABLE feedback_response (
        id INT PRIMARY KEY IDENTITY(1,1),
        manager_id INT NOT NULL,
        feedback_id INT NOT NULL,
        content NVARCHAR(MAX),
        created_at DATETIME DEFAULT GETDATE(),
        CONSTRAINT fk_feedback_response_manager FOREIGN KEY (manager_id) REFERENCES manager(id),
        CONSTRAINT fk_feedback_response_feedback FOREIGN KEY (feedback_id) REFERENCES feedback(id)
    )
""")

conn.execute("""
    CREATE TABLE discount (
        id INT PRIMARY KEY IDENTITY(1,1),
        product_variant_id INT NOT NULL,
        code NVARCHAR(100) UNIQUE,
        name NVARCHAR(255),
        type NVARCHAR(50), 
        value DECIMAL(19,3),
        status NVARCHAR(50) DEFAULT 'Active',
        start_date DATETIME,
        end_date DATETIME,
        CONSTRAINT fk_discount_product_variant FOREIGN KEY (product_variant_id) REFERENCES product_variant(id)
    )
""")

conn.execute("""
    CREATE TABLE [order] (
        id INT PRIMARY KEY IDENTITY(1,1),
        customer_id INT NOT NULL,
        order_date DATETIME DEFAULT GETDATE(),
        shipping_address NVARCHAR(500),
        status NVARCHAR(50) DEFAULT 'Processing', 
        payment_method NVARCHAR(255),
        payment_date DATETIME,
        payment_status NVARCHAR(50) DEFAULT 'Pending',
        payment_amount DECIMAL(19,3),
        CONSTRAINT fk_order_customer FOREIGN KEY (customer_id) REFERENCES customer(id)
    )
""")

conn.execute("""
    CREATE TABLE order_item (
        id INT PRIMARY KEY IDENTITY(1,1),
        product_variant_id INT NOT NULL,
        order_id INT NOT NULL,
        quantity INT NOT NULL DEFAULT 1,
        unit_price DECIMAL(10,3) NOT NULL,
        note NVARCHAR(500),
        CONSTRAINT fk_order_item_product_variant FOREIGN KEY (product_variant_id) REFERENCES product_variant(id),
        CONSTRAINT fk_order_item_order FOREIGN KEY (order_id) REFERENCES [order](id)
    )
""")

conn.execute("""
    CREATE TABLE order_history (
        id INT PRIMARY KEY IDENTITY(1,1),
        manager_id INT NOT NULL,
        order_id INT NOT NULL,
        processing_time DATETIME DEFAULT GETDATE(),
        previous_status NVARCHAR(50), 
        new_status NVARCHAR(50), 
        CONSTRAINT fk_order_history_manager FOREIGN KEY (manager_id) REFERENCES manager(id),
        CONSTRAINT fk_order_history_order FOREIGN KEY (order_id) REFERENCES [order](id)
    )
""")

<pyodbc.Cursor at 0x2b230ff6230>

In [34]:
category_tuples = [
    (row['id'], row['name']) 
    for _, row in tqdm(category_df.iterrows(), total=category_df.shape[0], desc="Create category tuples", unit="row", colour="green")
]

product_tuples = [
    (row['id'], row['category_id'], row['name'], row['description'], row['specification'], row['image_url'], row['brand'])
    for _, row in tqdm(product_df.iterrows(), total=product_df.shape[0], desc="Create product tuples", unit="row", colour="green")
]

attribute_tuples = [
    (row['id'], row['name']) 
    for _, row in tqdm(attribute_df.iterrows(), total=attribute_df.shape[0], desc="Create attribute tuples", unit="row", colour="green")
]

attribute_value_tuples = [
    (row['id'], row['attribute_id'], row['value']) 
    for _, row in tqdm(attribute_value_df.iterrows(), total=attribute_value_df.shape[0], desc="Create attribute_value tuples", unit="row", colour="green")
]

product_variant_tuples = [
    (row['id'], row['product_id'], row['price'], row['original_price'], row['sku'], row['stock_quantity'], row['sold_quantity'])
    for _, row in tqdm(product_variant_df.iterrows(), total=product_variant_df.shape[0], desc="Create product_variant tuples", unit="row", colour="green")
]

attribute_variant_tuples = [
    (int(row['attribute_value_id']), int(row['attribute_id']), int(row['product_variant_id'])) 
    for _, row in tqdm(attribute_variant_df.iterrows(), total=attribute_variant_df.shape[0], desc="Create attribute_variant tuples", unit="row", colour="green")
]

feedback_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(feedback_df.iterrows(), total=feedback_df.shape[0], desc="Create feedback tuples", unit="row", colour="green")
]

feedback_response_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(feedback_response_df.iterrows(), total=feedback_response_df.shape[0], desc="Create feedback_response tuples", unit="row", colour="green")
]

discount_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(discount_df.iterrows(), total=discount_df.shape[0], desc="Create discount tuples", unit="row", colour="green")
]

order_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(order_df.iterrows(), total=order_df.shape[0], desc="Create order tuples", unit="row", colour="green")
]

order_item_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(order_item_df.iterrows(), total=order_item_df.shape[0], desc="Create order_item tuples", unit="row", colour="green")
]

order_history_tuples = [
    tuple(None if pd.isna(x) else x for x in row)
    for _, row in tqdm(order_history_df.iterrows(), total=order_history_df.shape[0], desc="Create order_history tuples", unit="row", colour="green")
]

Create category tuples: 100%|[32m██████████[0m| 46/46 [00:00<00:00, 2642.55row/s]
Create product tuples: 100%|[32m██████████[0m| 2634/2634 [00:00<00:00, 11107.54row/s]
Create attribute tuples: 100%|[32m██████████[0m| 12/12 [00:00<00:00, 6740.54row/s]
Create attribute_value tuples: 100%|[32m██████████[0m| 715/715 [00:00<00:00, 15153.98row/s]
Create product_variant tuples: 100%|[32m██████████[0m| 3702/3702 [00:00<00:00, 12362.51row/s]
Create attribute_variant tuples: 100%|[32m██████████[0m| 4129/4129 [00:00<00:00, 20615.31row/s]
Create feedback tuples: 100%|[32m██████████[0m| 1934/1934 [00:00<00:00, 15386.48row/s]
Create feedback_response tuples: 100%|[32m██████████[0m| 549/549 [00:00<00:00, 15530.90row/s]
Create discount tuples: 100%|[32m██████████[0m| 1000/1000 [00:00<00:00, 15150.15row/s]
Create order tuples: 100%|[32m██████████[0m| 300000/300000 [00:19<00:00, 15228.23row/s]
Create order_item tuples: 100%|[32m██████████[0m| 300000/300000 [00:13<00:00, 21696.19row

In [35]:
cursor.execute("SET IDENTITY_INSERT category ON;")
cursor.executemany("INSERT INTO category (id, name) VALUES (?, ?)", category_tuples)
cursor.execute("SET IDENTITY_INSERT category OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [36]:
cursor.execute("SET IDENTITY_INSERT product ON;")
cursor.executemany("INSERT INTO product (id, category_id, name, description, specification, image_url, brand) VALUES (?, ?, ?, ?, ?, ?, ?)", product_tuples)
cursor.execute("SET IDENTITY_INSERT product OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [37]:
cursor.execute("SET IDENTITY_INSERT attribute ON;")
cursor.executemany("INSERT INTO attribute (id, name) VALUES (?, ?)", attribute_tuples)
cursor.execute("SET IDENTITY_INSERT attribute OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [38]:
cursor.execute("SET IDENTITY_INSERT attribute_value ON;")
cursor.executemany("INSERT INTO attribute_value (id, attribute_id, value) VALUES (?, ?, ?)", attribute_value_tuples)
cursor.execute("SET IDENTITY_INSERT attribute_value OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [39]:
cursor.execute("SET IDENTITY_INSERT product_variant ON;")
cursor.executemany("INSERT INTO product_variant (id, product_id, price, original_price, sku, stock_quantity, sold_quantity) VALUES (?, ?, ?, ?, ?, ?, ?)", product_variant_tuples)
cursor.execute("SET IDENTITY_INSERT product_variant OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [40]:
cursor.executemany("INSERT INTO attribute_variant (attribute_value_id, attribute_id, product_variant_id) VALUES (?, ?, ?)", attribute_variant_tuples)

In [41]:
cursor.execute("SET IDENTITY_INSERT feedback ON;")
cursor.executemany("INSERT INTO feedback (id, customer_id, product_id, product_variant_id, rating, comment, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)", feedback_tuples)
cursor.execute("SET IDENTITY_INSERT feedback OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [42]:
cursor.execute("SET IDENTITY_INSERT feedback_response ON;")
cursor.executemany("INSERT INTO feedback_response (id, manager_id, feedback_id, content, created_at) VALUES (?, ?, ?, ?, ?)", feedback_response_tuples)
cursor.execute("SET IDENTITY_INSERT feedback_response OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [43]:
cursor.execute("SET IDENTITY_INSERT discount ON;")
cursor.executemany("INSERT INTO discount (id, product_variant_id, code, name, type, value, status, start_date, end_date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", discount_tuples)
cursor.execute("SET IDENTITY_INSERT discount OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [44]:
cursor.execute("SET IDENTITY_INSERT [order] ON;")
cursor.executemany("INSERT INTO [order] (id, customer_id, order_date, shipping_address, status, payment_method, payment_date, payment_status, payment_amount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", order_tuples)
cursor.execute("SET IDENTITY_INSERT [order] OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [45]:
cursor.execute("SET IDENTITY_INSERT order_item ON;")
cursor.executemany("INSERT INTO order_item (id, product_variant_id, order_id, quantity, unit_price, note) VALUES (?, ?, ?, ?, ?, ?)", order_item_tuples)
cursor.execute("SET IDENTITY_INSERT order_item OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [46]:
cursor.execute("SET IDENTITY_INSERT order_history ON;")
cursor.executemany("INSERT INTO order_history (id, manager_id, order_id, processing_time, previous_status, new_status) VALUES (?, ?, ?, ?, ?, ?)", order_history_tuples)
cursor.execute("SET IDENTITY_INSERT order_history OFF;")

<pyodbc.Cursor at 0x2b247098430>

In [47]:
# close
conn.commit()
conn.close()