# Sku Generation on SQLite DB

In [1]:
#import necessary libraries
import os
import sys
import time
import pandas as pd
import sqlalchemy
import sqlite3

In [2]:
# connect to the database
conn = sqlite3.connect('sku_catalog.db')

In [3]:
# preview product list
query = "SELECT * FROM products LIMIT 10"
df_preview = pd.read_sql_query(query, conn)
df_preview

Unnamed: 0,id,product_name,category_id,description,base_price,status,created_at,updated_at
0,1,Bandit Banquet Tee,1,Raise a cold one to automotive history with th...,20,active,2025-12-02 20:47:57.632166,2025-12-02 20:47:57.632170
1,2,Bandit Truck Wreath Tee,1,Embrace the spirit of Legendary with our Bandi...,20,active,2025-12-02 20:47:57.635340,2025-12-02 20:47:57.635344
2,3,Bandit Truck Logo Tee,1,"Show your passion for powerful, custom muscle ...",20,active,2025-12-02 20:47:57.639244,2025-12-02 20:47:57.639246
3,4,Legendary Tee,1,Experience the pride of driving Legendary with...,20,active,2025-12-02 20:47:57.642034,2025-12-02 20:47:57.642037
4,5,Legendary Long Sleeve Tee,2,Experience the epitome of style and protection...,20,active,2025-12-02 20:47:57.644838,2025-12-02 20:47:57.644841
5,6,Legendary Polo,3,"Elevate your style with the Legendary Polo, a ...",40,active,2025-12-02 20:47:57.646922,2025-12-02 20:47:57.646924
6,7,Legendary Sweater,4,Wrap yourself in the power of Legendary Concep...,22,active,2025-12-02 20:47:57.648896,2025-12-02 20:47:57.648898
7,8,Bandit Truck Wreath Sweater,4,Introducing the Bandit Wreath Sweater – a bold...,22,active,2025-12-02 20:47:57.650957,2025-12-02 20:47:57.650959
8,9,Legendary Hoody,5,Unleash your inner muscle truck enthusiast wit...,25,active,2025-12-02 20:47:57.653287,2025-12-02 20:47:57.653289
9,10,Bandit Truck Jacket,6,"Introducing the Bandit Truck Jacket, a must-ha...",65,active,2025-12-02 20:47:57.655177,2025-12-02 20:47:57.655179


In [4]:
# preview a single product and its features
product_id = 1
query = f"""SELECT 
    p.product_name,
    c.category,
    c.subcategory,
    ft.feature_type,
    f.name AS feature_name,
    f.code AS feature_code
FROM products p
JOIN categories c ON p.category_id = c.id
JOIN product_features pf ON p.id = pf.product_id
JOIN features f ON pf.feature_id = f.id
JOIN feature_types ft ON f.feature_type_id = ft.id
WHERE p.product_name = 'Bandit Truck Logo Tee'
ORDER BY ft.display_order, f.sort_order;"""
df_product = pd.read_sql_query(query, conn)
df_product

Unnamed: 0,product_name,category,subcategory,feature_type,feature_name,feature_code
0,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Youth Small,YS
1,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Youth Medium,YM
2,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Youth Large,YL
3,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Extra Small,XS
4,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Small,S
5,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Medium,M
6,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Large,L
7,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Extra Large,XL
8,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Double Extra Large,2XL
9,Bandit Truck Logo Tee,Shirt,Tee Shirt,Size,Triple Extra Large,3XL


# Permutations on a single product

In [None]:
# function for, given a single product, generate all possible perutations of its features
def generate_permutations(product_name, debug=False):
    query = f"""SELECT 
        p.product_name,
        c.category,
        c.subcategory,
        ft.feature_type,
        f.name AS feature_name,
        f.code AS feature_code
    FROM products p
    JOIN categories c ON p.category_id = c.id
    JOIN product_features pf ON p.id = pf.product_id
    JOIN features f ON pf.feature_id = f.id
    JOIN feature_types ft ON f.feature_type_id = ft.id
    WHERE p.product_name = '{product_name}'
    ORDER BY ft.display_order, f.sort_order;"""
    
    df_product = pd.read_sql_query(query, conn)
    
    if debug:
        print(f"\nQuery returned {len(df_product)} rows")
        if len(df_product) == 0:
            print("WARNING: No data returned from query!")
            return []
    
    # Group features by feature type
    feature_groups = df_product.groupby('feature_type')['feature_code'].apply(list).to_dict()

    # debug print values
    if debug:
        print(f"\n{len(feature_groups)} feature groups before culling:")
        for key in feature_groups:
            print(f"  Feature Type: {key}, Features: {feature_groups[key]}")

    # cull NAN values from feature groups (check if last 3 characters are 'NAN')
    for key in feature_groups:
        feature_groups[key] = [code for code in feature_groups[key] if pd.notna(code) and not str(code).endswith('NAN')]
    
    # Remove empty feature groups
    feature_groups = {k: v for k, v in feature_groups.items() if v}
    
    if debug:
        print(f"\n{len(feature_groups)} feature groups after culling:")
        for key in feature_groups:
            print(f"  Feature Type: {key}, Features: {feature_groups[key]}")
    
    if not feature_groups:
        print("WARNING: No valid feature groups found after culling!")
        return []

    # Generate all combinations of features
    from itertools import product
    permutations = [list(combo) for combo in product(*feature_groups.values())]

    # check for duplicates (convert lists to tuples for set comparison)
    unique_permutations = set(tuple(p) for p in permutations)
    if len(unique_permutations) < len(permutations):
        print("Warning: Duplicate permutations found!")
    if debug:
        print(f"\nTotal permutations generated: {len(permutations)}")
        print(f"Unique permutations count: {len(unique_permutations)}")

    if debug:
        # Print first few permutations as preview
        print(f"\nSample permutations (first 5):")
        for i, perm in enumerate(permutations[:5], 1):
            print(f"  {i}. ('{product_name}', {perm})")
        if len(permutations) > 5:
            print(f"  ... and {len(permutations) - 5} more")
    
    # Return list of tuples: (product_name, feature_codes)
    return [(product_name, perm) for perm in permutations]

# test the function
test_permutations = generate_permutations('Bandit Truck Logo Tee',True) 


Query returned 16 rows

5 feature groups before culling:
  Feature Type: Color, Features: ['BLK']
  Feature Type: Fit, Features: ['USX']
  Feature Type: Material, Features: ['CTN']
  Feature Type: Scent, Features: ['STNAN']
  Feature Type: Size, Features: ['YS', 'YM', 'YL', 'XS', 'S', 'M', 'L', 'XL', '2XL', '3XL', '4XL', '5XL']

4 feature groups after culling:
  Feature Type: Color, Features: ['BLK']
  Feature Type: Fit, Features: ['USX']
  Feature Type: Material, Features: ['CTN']
  Feature Type: Size, Features: ['YS', 'YM', 'YL', 'XS', 'S', 'M', 'L', 'XL', '2XL', '3XL', '4XL', '5XL']

Total permutations generated: 12
Unique permutations count: 12

Sample permutations (first 5):
  1. ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YS'])
  2. ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YM'])
  3. ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YL'])
  4. ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'XS'])
  5. ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'S'])
  ... and 7

In [17]:
test_permutations[:3]  # preview first 3 permutations

[('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YS']),
 ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YM']),
 ('Bandit Truck Logo Tee', ['BLK', 'USX', 'CTN', 'YL'])]

# Generate SKUs from product permutations

In [None]:
from typing import List, Tuple

# function to generate a SKU from a permutation tuple (product_name, feature_codes)

def generate_sku(permutation: Tuple[str, List[str]], debug=True):
    product_name, items = permutation
    sku = ""

    # query category ID (which contains both main category and subcategory info)
    query = f"""SELECT 
        c.id as category_id
    FROM products p
    JOIN categories c ON p.category_id = c.id
    WHERE p.product_name = '{product_name}'
    LIMIT 1;"""
    
    df_category = pd.read_sql_query(query, conn)
    
    if len(df_category) == 0:
        print(f"WARNING: Product '{product_name}' not found!")
        return None
    
    category_id = df_category['category_id'].iloc[0]
    
    # In your schema, the category table has a single ID
    # If you need separate main_category_id and subcategory_id, you'll need to add those columns
    # For now, using the category ID as the prefix
    prefix = str(category_id)
    
    if debug:
        print(f"Category ID: {category_id}")
        print(f"Prefix: {prefix}")

    # join items separated by a dash
    sku = "-".join(items)

    # append category ID (first item)
    # sku += items[0]

    # append subcategory ID (second item)
    # sku += items[1]

    if debug:
        print(f"Generated SKU: {sku}")

    return sku

# test the function
test_sku = generate_sku(test_permutations[0], True)

Category ID: category_id    1
category_id    1
Name: 0, dtype: int64
Prefix: category_id    1
category_id    1
Name: 0, dtype: int64
Generated SKU: BLK-USX-CTN-YS
