In [1]:
import os
import pyodbc
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
def get_db_connection(DB_NAME:str):
    conn = pyodbc.connect(
        f"DRIVER={os.getenv('DB_DRIVER')};"
        f"SERVER={os.getenv('DB_SERVER')};"
        f"DATABASE={DB_NAME};"
        f"UID={os.getenv('DB_UID')};"
        f"PWD={os.getenv('DB_PWD')};"
        f"TrustServerCertificate={os.getenv('TRUST_SERVER_CERTIFICATE')};"
    )
    
    return conn


Save all tables relate to products to SQL Server

In [4]:
conn = get_db_connection(os.getenv('DB_NAME'))
cursor = conn.cursor()

In [5]:
category_df = pd.read_csv('./transformedData/category.csv', encoding='utf-8')
product_df = pd.read_csv('./transformedData/product.csv', encoding='utf-8')
attribute_df = pd.read_csv('./transformedData/attribute.csv', encoding='utf-8')
attribute_value_df = pd.read_csv('./transformedData/attribute_value.csv', encoding='utf-8')
product_variant_df = pd.read_csv('./transformedData/product_variant.csv', encoding='utf-8')
attribute_variant_df = pd.read_csv('./transformedData/attribute_variant.csv', encoding='utf-8')

In [12]:
conn.execute("""
    IF OBJECT_ID('category', 'U') IS NOT NULL DROP TABLE category;
    
    IF OBJECT_ID('product', 'U') IS NOT NULL DROP TABLE product;
             
    IF OBJECT_ID('attribute', 'U') IS NOT NULL DROP TABLE attribute;
             
    IF OBJECT_ID('attribute_value', 'U') IS NOT NULL DROP TABLE attribute_value;
             
    IF OBJECT_ID('product_variant', 'U') IS NOT NULL DROP TABLE product_variant;
             
    IF OBJECT_ID('attribute_variant', 'U') IS NOT NULL DROP TABLE attribute_variant;
""")

<pyodbc.Cursor at 0x1bb3fd61530>

In [13]:
conn.execute("""
    CREATE TABLE category (
        id INT PRIMARY KEY IDENTITY(1,1),
        name NVARCHAR(255) NOT NULL,
    )
""")

conn.execute("""
    CREATE TABLE product (
        id INT PRIMARY KEY IDENTITY(1,1),
        category_id INT NOT NULL,
        name NVARCHAR(255) NOT NULL,
        description NVARCHAR(MAX),
        specification NVARCHAR(MAX),
        image_url NVARCHAR(MAX),
        brand NVARCHAR(255),
        CONSTRAINT fk_product_category FOREIGN KEY (category_id) REFERENCES category(id)
    )
""")

conn.execute("""
    CREATE TABLE attribute (
        id INT PRIMARY KEY IDENTITY(1,1),
        name NVARCHAR(255) NOT NULL
    )
""")

conn.execute("""
    CREATE TABLE attribute_value (
        id INT PRIMARY KEY IDENTITY(1,1),
        attribute_id INT NOT NULL,
        value NVARCHAR(255) NOT NULL,
        CONSTRAINT fk_attribute_value_attribute FOREIGN KEY (attribute_id) REFERENCES attribute(id)
    )
""")

conn.execute("""
    CREATE TABLE product_variant (
        id INT PRIMARY KEY IDENTITY(1,1),
        product_id INT NOT NULL,
        price DECIMAL(19,3) NOT NULL,
        original_price DECIMAL(19,3) NOT NULL,
        sku NVARCHAR(100),
        stock_quantity INT DEFAULT 0,
        sold_quantity INT DEFAULT 0,
        CONSTRAINT fk_product_variant_product FOREIGN KEY (product_id) REFERENCES product(id)
    )
""")

conn.execute("""
    CREATE TABLE attribute_variant (
        attribute_value_id INT NOT NULL,
        attribute_id INT NOT NULL,
        product_variant_id INT NOT NULL,
        CONSTRAINT pk_variant_attribute PRIMARY KEY (attribute_value_id, attribute_id, product_variant_id),
        CONSTRAINT fk_variant_attribute_attribute_value FOREIGN KEY (attribute_value_id) REFERENCES attribute_value(id),
        CONSTRAINT fk_variant_attribute_attribute FOREIGN KEY (attribute_id) REFERENCES attribute(id),
        CONSTRAINT fk_variant_attribute_product_variant FOREIGN KEY (product_variant_id) REFERENCES product_variant(id)
    )
""")

<pyodbc.Cursor at 0x1bb4071b9b0>

In [20]:
category_tuples = [
    (row['id'], row['name']) 
    for index, row in tqdm(category_df.iterrows(), total=category_df.shape[0], desc="Create category tuples", unit="row", colour="green")
]

product_tuples = [
    (row['id'], row['category_id'], row['name'], row['description'], row['specification'], row['image_url'], row['brand'])
    for _, row in tqdm(product_df.iterrows(), total=product_df.shape[0], desc="Create product tuples", unit="row", colour="green")
]

attribute_tuples = [
    (row['id'], row['name']) 
    for _, row in tqdm(attribute_df.iterrows(), total=attribute_df.shape[0], desc="Create attribute tuples", unit="row", colour="green")
]

attribute_value_tuples = [
    (row['id'], row['attribute_id'], row['value']) 
    for _, row in tqdm(attribute_value_df.iterrows(), total=attribute_value_df.shape[0], desc="Create attribute_value tuples", unit="row", colour="green")
]

product_variant_tuples = [
    (row['id'], row['product_id'], row['price'], row['original_price'], row['sku'], row['stock_quantity'], row['sold_quantity'])
    for _, row in tqdm(product_variant_df.iterrows(), total=product_variant_df.shape[0], desc="Create product_variant tuples", unit="row", colour="green")
]

attribute_variant_tuples = [
    (int(row['attribute_value_id']), int(row['attribute_id']), int(row['product_variant_id'])) 
    for _, row in tqdm(attribute_variant_df.iterrows(), total=attribute_variant_df.shape[0], desc="Create attribute_variant tuples", unit="row", colour="green")
]

Create category tuples:   0%|[32m          [0m| 0/46 [00:00<?, ?row/s]

Create category tuples: 100%|[32m██████████[0m| 46/46 [00:00<00:00, 1139.50row/s]
Create product tuples: 100%|[32m██████████[0m| 2504/2504 [00:00<00:00, 8151.16row/s]
Create attribute tuples: 100%|[32m██████████[0m| 12/12 [00:00<00:00, 4400.39row/s]
Create attribute_value tuples: 100%|[32m██████████[0m| 633/633 [00:00<00:00, 6858.45row/s]
Create product_variant tuples: 100%|[32m██████████[0m| 3488/3488 [00:00<00:00, 6742.00row/s]
Create attribute_variant tuples: 100%|[32m██████████[0m| 3896/3896 [00:00<00:00, 15839.48row/s]


In [14]:
cursor.execute("SET IDENTITY_INSERT category ON;")
cursor.executemany("INSERT INTO category (id, name) VALUES (?, ?)", category_tuples)
cursor.execute("SET IDENTITY_INSERT category OFF;")

<pyodbc.Cursor at 0x1bb3fd4ae30>

In [15]:
cursor.execute("SET IDENTITY_INSERT product ON;")
cursor.executemany("INSERT INTO product (id, category_id, name, description, specification, image_url, brand) VALUES (?, ?, ?, ?, ?, ?, ?)", product_tuples)
cursor.execute("SET IDENTITY_INSERT product OFF;")

<pyodbc.Cursor at 0x1bb3fd4ae30>

In [16]:
cursor.execute("SET IDENTITY_INSERT attribute ON;")
cursor.executemany("INSERT INTO attribute (id, name) VALUES (?, ?)", attribute_tuples)
cursor.execute("SET IDENTITY_INSERT attribute OFF;")

<pyodbc.Cursor at 0x1bb3fd4ae30>

In [17]:
cursor.execute("SET IDENTITY_INSERT attribute_value ON;")
cursor.executemany("INSERT INTO attribute_value (id, attribute_id, value) VALUES (?, ?, ?)", attribute_value_tuples)
cursor.execute("SET IDENTITY_INSERT attribute_value OFF;")

<pyodbc.Cursor at 0x1bb3fd4ae30>

In [18]:
cursor.execute("SET IDENTITY_INSERT product_variant ON;")
cursor.executemany("INSERT INTO product_variant (id, product_id, price, original_price, sku, stock_quantity, sold_quantity) VALUES (?, ?, ?, ?, ?, ?, ?)", product_variant_tuples)
cursor.execute("SET IDENTITY_INSERT product_variant OFF;")

<pyodbc.Cursor at 0x1bb3fd4ae30>

In [21]:
cursor.executemany("INSERT INTO attribute_variant (attribute_value_id, attribute_id, product_variant_id) VALUES (?, ?, ?)", attribute_variant_tuples)

In [22]:
# close
conn.commit()
conn.close()