# Sku Generator Beta Testing Playground

Read Apple Numbers file with multiple sheets into Pandas Data frames

In [1]:
import pandas as pd
from numbers_parser import Document

## Data Preparation and Validation

In [2]:
def load_catalog_data(file_path="",debug=False):
    """
    Load Apple Numbers file with multiple sheets into Pandas DataFrames.
    
    Parameters:
    - file_path: path to the Numbers file
    
    Returns:
    - tables: dictionary of DataFrames, one for each sheet/table in the Numbers file
    """
    # Read the Numbers file
    doc = Document(file_path)
    sheets = doc.sheets
    tables = {}

    # Extract each sheet as a separate dataframe
    for sheet in sheets:
        for table in sheet.tables:
            table_name = table.name.replace(" ", "_")
            data = table.rows(values_only=True)
            tables[table_name] = pd.DataFrame(data[1:], columns=data[0])

    # Access individual dataframes
    if debug:
        for name, df in tables.items():
            print(f"\n{name}:")
            print(df.head())

    # For Product Catalog sheet replace spaces with underscores in column names
    tables['Product_Catalog'].columns = tables['Product_Catalog'].columns.str.replace(' ', '_')
    
    return tables

# Load the data
file_path = "CHARTS/catalog.numbers"
tables = load_catalog_data(file_path,True)



Product_Catalog:
   Index Main Category           Sub Category                       Name  \
0    0.0         Shirt              Tee Shirt         Bandit Banquet Tee   
1    1.0         Shirt              Tee Shirt    Bandit Truck Wreath Tee   
2    2.0         Shirt              Tee Shirt      Bandit Truck Logo Tee   
3    3.0         Shirt              Tee Shirt              Legendary Tee   
4    4.0         Shirt  Long Sleeve Tee Shirt  Legendary Long Sleeve Tee   

                       Size     Fit                     Color  \
0  S,M,L,XL,2XL,3XL,4XL,5XL  Unisex                    Yellow   
1  S,M,L,XL,2XL,3XL,4XL,5XL  Unisex                     Black   
2  S,M,L,XL,2XL,3XL,4XL,5XL  Unisex                     Black   
3  S,M,L,XL,2XL,3XL,4XL,5XL  Unisex  White, Blue, Gray, Black   
4  S,M,L,XL,2XL,3XL,4XL,5XL  Unisex                      Blue   

                                    Design Material           Scent  
0                           Bandit Banquet   Cotton  Not Applica

In [3]:
def validate_catalog_features(data="",debug=False):
    """
    Validate that code sheet names match catalog features and extract the feature list.
    
    Parameters:
    - tables: dictionary of DataFrames from load_catalog_data()
    
    Returns:
    - tuple: (code_sheet_list, catalog_feature_list)
    
    Raises:
    - AssertionError if sheet names don't match catalog features
    """
    # get lists of code sheets and catalog features
    code_sheet_list = list(tables.keys())[2:]  # List of sheet names
    catalog_feature_list = list(tables['Product_Catalog'].columns[1:])  # list of features from Product Catalog sheet excluding first column

    # remove "Name" from catalog_feature_list if present
    if "Name" in catalog_feature_list:
        catalog_feature_list.remove("Name")

    # sanity check: make sure both lists match exactly
    if set(code_sheet_list) == set(catalog_feature_list):
        print("Sheet names and catalog features MATCH exactly!")
    else:
        raise AssertionError("Sheet names and catalog features DO NOT match!")

    # for visual reference, create a DF to show matching sheets and features
    matching_df = pd.DataFrame({
        'Sheet_Names': code_sheet_list,
        'Catalog_Features': catalog_feature_list
    })
    if debug:
        print(matching_df)
    
    return code_sheet_list, catalog_feature_list

# Validate and extract feature lists
code_sheet_list, catalog_feature_list = validate_catalog_features(tables,True)

Sheet names and catalog features MATCH exactly!
     Sheet_Names Catalog_Features
0  Main_Category    Main_Category
1   Sub_Category     Sub_Category
2           Size             Size
3            Fit              Fit
4          Color            Color
5         Design           Design
6       Material         Material
7          Scent            Scent


## Prepare Data

In [4]:
# create a list of batch names from the set of unique Name field values in the product catalog

def prepare_batches(data):
    """
    Get unique product names from the Product Catalog and check for duplicates.
    
    Parameters:
    - data: DataFrame (Product_Catalog)
    
    Returns:
    - list of unique product names (batch names)
    """
    # Get unique names from the Name column
    name_field_list = data['Name'].tolist()
    name_field_set = set(name_field_list)
    
    # Check for duplicates by comparing list length to set length
    has_duplicates = len(name_field_list) != len(name_field_set)
    
    if has_duplicates:
        duplicate_list = []
        print(f"WARNING: Found {len(name_field_list) - len(name_field_set)} duplicate name(s)")
        # Find and print duplicates
        seen = {}
        for idx, name in enumerate(name_field_list):
            if name in seen:
                duplicate_list.append((name, seen[name], idx))
            else:
                seen[name] = idx
        
        # Print the duplicates
        print("\nDuplicate Names Found:")
        for name, first_idx, second_idx in duplicate_list:
            print(f"  '{name}' at indexes {first_idx} and {second_idx}")
    else:
        print(f"All product names are unique, total batches to process: {len(name_field_list)}")
    
    return list(name_field_set)

batch_name_list = prepare_batches(tables['Product_Catalog'])    

All product names are unique, total batches to process: 18


### Permutation logic
Generate dictionaries all possible permutations of a given prodcut model, extracting the feature, prefix, and index

In [5]:
from itertools import product
import re

def generate_permutations(batch_name="",debug=False):
    """
    For a given product model (ie batch_name), generate all possible permutations of feature combinations.
    Returns a list of dictionaries, where each dictionary represents one permutation.
    Each value in the dict is [value_name, prefix, index] (as strings).
    Prefix and index are from the corresponding code sheet.
    For AND cases (semicolon), indexes are concatenated and value names are simplified.
    Features with index '0' or prefix ending in 'NAN' are excluded (not applicable values).

    Parameters:
    - batch_name: String representing a row from Product_Catalog DataFrame
    - debug: Boolean flag to enable debug printing
    
    Returns:
    - List of dictionaries, each representing a permutation
    """

    # Size abbreviation to full name mapping
    size_mapping = {
        'CS': 'Child Small',
        'CM': 'Child Medium',
        'CL': 'Child Large',
        'XS': 'Extra Small',
        'S': 'Small',
        'M': 'Medium',
        'L': 'Large',
        'XL': 'Extra Large',
        '2XL': 'Double Extra Large',
        '3XL': 'Triple Extra Large',
        '4XL': 'Quadruple Extra Large',
        '5XL': 'Quintuple Extra Large',
        'S/M': 'Small to Medium',
        'L/XL': 'Large to Extra Large',
        'NA': 'Not Applicable'
    }
    
    # Dictionary to hold all feature values for this product model
    feature_options = {}
    
    for feature in catalog_feature_list:
        feature_values = batch_name[feature]
        
        # Check if feature_values contains comma or semicolon (is a list)
        if isinstance(feature_values, str):
            if ',' in feature_values:
                # Comma-separated = "OR" - these create separate permutations
                values = [v.strip() for v in feature_values.split(',')]
                
                # If this is Size feature, map abbreviations to full names
                if feature == 'Size':
                    values = [size_mapping.get(v, v) for v in values]
                
                feature_options[feature] = {
                    'values': values,
                    'type': 'OR'
                }
            elif ';' in feature_values:
                # Semicolon-separated = "AND" - these must all appear together
                values = [v.strip() for v in feature_values.split(';')]
                
                # If this is Size feature, map abbreviations to full names
                if feature == 'Size':
                    values = [size_mapping.get(v, v) for v in values]
                
                feature_options[feature] = {
                    'values': values,
                    'type': 'AND'
                }
            else:
                # Single value
                value = feature_values
                
                # If this is Size feature, map abbreviation to full name
                if feature == 'Size':
                    value = size_mapping.get(value.strip(), value)
                
                feature_options[feature] = {
                    'values': [value],
                    'type': 'SINGLE'
                }
        else:
            # Non-string value (e.g., NaN, number) - treat as single value
            feature_options[feature] = {
                'values': [feature_values] if not pd.isna(feature_values) else [''],
                'type': 'SINGLE'
            }
    
    # Helper function to get prefix and index for a feature value
    def get_value_info(feature, value_name):
        """Returns [value_name, prefix, index] for a given feature value."""
        code_sheet = tables[feature]
        matching_rows = code_sheet.loc[code_sheet["Name"] == value_name]
        
        if not matching_rows.empty:
            code_index = matching_rows.index[0]
            prefix = code_sheet.at[code_index, "Prefix"]
            # Convert index to string (no padding)
            index_str = str(code_index)
            return [value_name, prefix, index_str]
        else:
            # If not found, return empty prefix and empty index
            return [value_name, "", ""]
    
    # Helper function to check if a feature should be excluded
    def is_not_applicable(value_info):
        """Returns True if the feature is 'Not Applicable' (index 0 or prefix ends with NAN)"""
        index = value_info[2]
        prefix = value_info[1]
        return index == '0' or prefix.endswith('NAN')
    
    # Helper function to simplify value names for AND cases
    def simplify_and_values(value_names):
        """
        Simplifies multiple value names by finding common parts.
        E.g., ['Bandit Truck Icon', 'Bandit Truck Wordmark'] -> 'Bandit Truck Icon AND Wordmark'
        """
        if len(value_names) == 1:
            return value_names[0]
        
        # Find common prefix among all names
        common_prefix = ""
        min_len = min(len(name) for name in value_names)
        
        for i in range(min_len):
            if all(name[i] == value_names[0][i] for name in value_names):
                common_prefix += value_names[0][i]
            else:
                break
        
        # Clean up common prefix (remove trailing spaces/incomplete words)
        common_prefix = common_prefix.rstrip()
        
        # Extract unique parts from each name
        unique_parts = []
        for name in value_names:
            unique_part = name[len(common_prefix):].strip()
            if unique_part:
                unique_parts.append(unique_part)
        
        # Construct simplified name
        if common_prefix and unique_parts:
            return f"{common_prefix} {' AND '.join(unique_parts)}"
        else:
            return ' AND '.join(value_names)
    
    # Separate features by type
    or_features = {k: v['values'] for k, v in feature_options.items() if v['type'] == 'OR'}
    and_features = {k: v['values'] for k, v in feature_options.items() if v['type'] == 'AND'}
    single_features = {k: v['values'] for k, v in feature_options.items() if v['type'] == 'SINGLE'}
    
    # Build permutation components
    permutation_dict = {}
    
    # Add single features (these don't multiply permutations)
    for feature, values in single_features.items():
        permutation_dict[feature] = values
    
    # Add OR features (these DO multiply permutations)
    for feature, values in or_features.items():
        permutation_dict[feature] = values
    
    # Generate all permutations using cartesian product
    feature_names = list(permutation_dict.keys())
    feature_value_lists = [permutation_dict[f] for f in feature_names]
    
    permutations = []
    for combo in product(*feature_value_lists):
        perm = {}
        
        # For each feature in this combination, get [value, prefix, index]
        for feature, value_name in zip(feature_names, combo):
            value_info = get_value_info(feature, value_name)
            
            # Skip if this is a "Not Applicable" value
            if not is_not_applicable(value_info):
                perm[feature] = value_info
        
        # Add AND features (combine into single [simplified_name, prefix, concatenated_indexes])
        for feature, values in and_features.items():
            # Get info for all values
            value_infos = [get_value_info(feature, val) for val in values]
            
            # Check if any of the AND values are "Not Applicable"
            if any(is_not_applicable(info) for info in value_infos):
                continue  # Skip this entire AND feature
            
            # Simplify the value name
            simplified_name = simplify_and_values([info[0] for info in value_infos])
            
            # Use first prefix (assuming they're all the same)
            prefix = value_infos[0][1] if value_infos else ""
            
            # Concatenate indexes (no padding)
            concatenated_index = "".join([info[2] for info in value_infos])
            
            perm[feature] = [simplified_name, prefix, concatenated_index]
        
        # Add the product name
        perm['Name'] = batch_name['Name']
        
        permutations.append(perm)

    if debug:
        print(f"Product: {batch_name['Name']}")
        print(f"Number of permutations: {len(permutations)}\n")
        print(f"Data imput type {type(batch_name)}")
        print(f"Data output type: {type(permutations)}\n")
    
    return permutations

# Test permutation generation with arbitrary row
test_row = tables['Product_Catalog'].iloc[2]
permutations = generate_permutations(test_row,True)

Product: Bandit Truck Logo Tee
Number of permutations: 8

Data imput type <class 'pandas.core.series.Series'>
Data output type: <class 'list'>



### SKU number generation
Generate unique SKUs based on the feature prefixes and indices. Use a timestamp with millisecond accuracy as a contingincy in the case of duplicate SKUs

In [6]:
# generate a unique sku based on prefixes and indexes
from datetime import datetime
from multiprocessing.util import debug
import base36

def generate_sku(dict, add_timestamp=False, debug=False):
    """
    Generate a SKU from a permutation dictionary.
    
    Parameters:
    - dict: permutation dictionary with feature info
    - add_timestamp: if True, adds a compact timestamp suffix for guaranteed uniqueness

    Returns:
    - Tuple: model_name, sku
    """

    model_name = dict['Name']
    part_list = []
    sku_prefix = ""

    # Create sku prefix for main prefix, and sub prefix and index
    sku_prefix += dict['Main_Category'][1]
    sku_prefix += dict['Sub_Category'][1]
    sku_prefix += dict['Sub_Category'][2]
    part_list.append(sku_prefix)


    # iterate the rest of the dict items
    for i, key in enumerate(dict.keys()):
        if i >= 2 and key != "Name":  # only the third and proceeding items, exclude "Name"
            # print(f"key: {key}, value: {dict[key]}")
            part = dict[key][1] + dict[key][2]
            part_list.append(part)

    # Base SKU
    sku = "-".join(part_list)
    
    # Add timestamp suffix if requested
    if add_timestamp:
        now = datetime.now()
        unix_ms = int(now.timestamp() * 1000)
        compact_time = base36.dumps(unix_ms)[-6:].upper()
        
        sku = f"{sku}-{compact_time}"
    
    if debug:
        print(f"Sku generated for\n Product Model: {model_name}\n variant: {sku}\n SKU length: {len(sku)}")

    return (model_name, sku)

# test on an arbitrary permutation
model, sku = generate_sku(permutations[0],False,True)


Sku generated for
 Product Model: Bandit Truck Logo Tee
 variant: MAUS1-FT1-CL2-MAT1-SZ5-D45
 SKU length: 26


## SKU number persistance
Generate and write generate SKUs to a csv file with other product details. 
Prevent duplicate skus by checking the existing CSV and regenerate skus with timestamp based unique identifiers in this case.

#### Initialize an output CSV file

In [7]:
import os
from datetime import datetime

def initialize_output_csv(output_dir='', batch_name='', overwrite=False, debug=False):
    """
    Initialize a CSV file for SKU batch output using a predefined template structure.
    
    Creates a timestamped CSV file with columns matching the sku_batch_template.csv format.
    The filename includes the batch name and date for organization and tracking.
    
    Parameters
    ----------
    output_dir : str
        Directory path where the CSV should be created (e.g., 'Results/').
        Directory will be created if it doesn't exist.
    batch_name : str
        Name of the product model/batch being processed. Used in the filename
        for identification (e.g., 'Basic Tee' -> 'Basic Tee_skus_20251121.csv').
    overwrite : bool, optional
        Controls behavior when file already exists:
        - False (default): Skips file creation and returns None
        - True: Replaces existing file with new empty template
    debug : bool, optional
        If True, prints detailed diagnostic information including file paths,
        directory status, and overwrite mode. Default is False.
    
    Returns
    -------
    str or None
        Absolute path to the created CSV file if successful, or None if file
        already exists and overwrite=False.
    
    Notes
    -----
    - File naming format: '{batch_name}_skus_{YYYYMMDD}.csv'
    - Date-based naming allows multiple generations per batch on different days
    - Template loaded from 'CHARTS/sku_batch_template.csv'
    - All SKU columns from template are preserved in the output file
    
    Examples
    --------
    Create new batch file:
    >>> path = initialize_output_csv('Results/', 'Basic Tee', overwrite=False)
    >>> print(path)
    'Results/Basic Tee_skus_20251121.csv'
    
    Overwrite existing batch file:
    >>> path = initialize_output_csv('Results/', 'Basic Tee', overwrite=True)
    Overwriting existing CSV at: Results/Basic Tee_skus_20251121.csv
    
    Debug mode for troubleshooting:
    >>> path = initialize_output_csv('Results/', 'Basic Tee', debug=True)
    Debug: Attempting to create file at: Results/Basic Tee_skus_20251121.csv
    Debug: Directory exists: True
    Debug: File exists: False
    Debug: Overwrite mode: False
    """

    # timestamp the file creation in the filepath
    timestamp = datetime.now().strftime('%Y%m%d')

    file_path = output_dir + batch_name + '_skus_' + timestamp + '.csv'
    
    if debug:
        print(f"Debug: Attempting to create file at: {file_path}")
        print(f"Debug: Directory exists: {os.path.exists(output_dir)}")
        print(f"Debug: File exists: {os.path.exists(file_path)}")
        print(f"Debug: Overwrite mode: {overwrite}")

    # Check if FILE already exists (not directory)
    if os.path.exists(file_path):
        if not overwrite:
            print(f"CSV already exists at: {file_path}")
            print(f"Set overwrite=True to replace existing file")
            return None
        else:
            print(f"Overwriting existing CSV at: {file_path}")
    
    # Load the template CSV from CHARTS directory
    template_path = 'CHARTS/sku_batch_template.csv'
    template_df = pd.read_csv(template_path)
    template_columns = template_df.columns.tolist()
    
    # Create an empty DataFrame with these columns
    output_df = pd.DataFrame(columns=template_columns)
    
    # Ensure the directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Save to CSV
    output_df.to_csv(file_path, index=False)
    
    print(f"Created new CSV template with columns: {template_columns}")
    print(f"Saved to: {file_path}")
    
    return file_path

# Test the function
output_dir = 'Results/'
result = initialize_output_csv(output_dir, model, overwrite=True, debug=True)
print(f"\nReturned: {result}")


Debug: Attempting to create file at: Results/Bandit Truck Logo Tee_skus_20251121.csv
Debug: Directory exists: True
Debug: File exists: True
Debug: Overwrite mode: True
Overwriting existing CSV at: Results/Bandit Truck Logo Tee_skus_20251121.csv
Created new CSV template with columns: ['Index', 'SKU', 'Main_Category', 'Sub_Category', 'Name', 'Size', 'Fit', 'Color', 'Design', 'Material', 'Scent']
Saved to: Results/Bandit Truck Logo Tee_skus_20251121.csv

Returned: Results/Bandit Truck Logo Tee_skus_20251121.csv


#### Parse batches into dataframes

### Sequential Permutation and Procedural SKU Generation

In [8]:

def generate_batch_dict(data=list,debug=False):
    """
    Generate a dictionary capturing all generated SKU permutations for each batch.
    Parameters:
    - data: list of batch names (product models)
    - debug: boolean flag to enable debug printing
    Returns:
    - batch_dict: dictionary where keys are batch names and values are lists of permutation dictionaries
    """
    
    batch_dict = {} #capture updated batch data with SKUs

    for batch_name in data:
        # Get the row from Product_Catalog that matches this batch_name
        batch_row = tables['Product_Catalog'][tables['Product_Catalog']['Name'] == batch_name].iloc[0]
        
        # Generate permutations for this batch (returns list of dictionaries)
        permed_batch = generate_permutations(batch_row)
        batch_dict[batch_name] = permed_batch
        
        # Add SKU to each permutation dictionary
        for perm in permed_batch:
            model, sku = generate_sku(perm)
            perm['SKU'] = sku  # Add SKU key directly to the permutation dictionary
        
        if debug:
            print(f"Batch '{batch_name}': Generated {len(permed_batch)} SKUs")

    return batch_dict

batch_dict = generate_batch_dict(batch_name_list,True)

Batch 'Legendary Polo': Generated 16 SKUs
Batch 'Bandit Truck Wreath Sweater': Generated 8 SKUs
Batch 'Legendary Trucker Hat': Generated 2 SKUs
Batch 'Bandit Truck Trucker Hat': Generated 1 SKUs
Batch 'Bandit Truck Flex Fit Hat': Generated 2 SKUs
Batch 'Bandit Truck Jacket': Generated 8 SKUs
Batch 'Bandit Banquet Tee': Generated 8 SKUs
Batch 'Bandit Truck Wreath Tee': Generated 8 SKUs
Batch 'Collectible Gold Keychain': Generated 2 SKUs
Batch 'Legendary Hoody': Generated 8 SKUs
Batch 'Legendary Long Sleeve Tee': Generated 8 SKUs
Batch 'Legendary Tee': Generated 32 SKUs
Batch 'Legendary Flex Fit Hat': Generated 2 SKUs
Batch 'Legendary Jacket': Generated 8 SKUs
Batch 'Legendary Sweater': Generated 8 SKUs
Batch 'Bandit Truck Logo Tee': Generated 8 SKUs
Batch 'Leather Air Freshener': Generated 6 SKUs
Batch 'Legendary Distressed Hat': Generated 2 SKUs


In [12]:
def batch_to_dataframe(batch_name='',data_source=dict, batch_name_list=batch_name_list,debug=False):
    """
    Convert a batch's list of permutation dictionaries to a Pandas DataFrame.
    Parameters:
    - batch_name: String name of the batch to convert
    - data_source: Dictionary of batches
    - batch_name_list: List of valid batch names (default: batch_name_list)
    - debug: Boolean flag to enable debug printing
    Returns:
    - batch_df: Pandas DataFrame for the specified batch
    """
    for batch in data_source:
        if batch == batch_name:
            batch_df = pd.DataFrame(data_source[batch])
            batch_df.name = f"{batch_name}_df"
            if debug:
                print(f"Converted batch '{batch_name}' to DataFrame\n name: {batch_name}_df\n Shape: {batch_df.shape}")
    return batch_df



In [13]:

batch_name = batch_name_list[0]

batch_to_dataframe(batch_name,batch_dict,debug=True) 

Converted batch 'Legendary Polo' to DataFrame
 name: Legendary Polo_df
 Shape: (16, 9)


Unnamed: 0,Main_Category,Sub_Category,Color,Material,Size,Fit,Design,Name,SKU
0,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Small, SZ, 5]","[Male, FT, 2]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ5-FT2-D235
1,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Small, SZ, 5]","[Female, FT, 3]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ5-FT3-D235
2,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Medium, SZ, 6]","[Male, FT, 2]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ6-FT2-D235
3,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Medium, SZ, 6]","[Female, FT, 3]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ6-FT3-D235
4,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Large, SZ, 7]","[Male, FT, 2]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ7-FT2-D235
5,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Large, SZ, 7]","[Female, FT, 3]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ7-FT3-D235
6,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Extra Large, SZ, 8]","[Male, FT, 2]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ8-FT2-D235
7,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Extra Large, SZ, 8]","[Female, FT, 3]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ8-FT3-D235
8,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Double Extra Large, SZ, 9]","[Male, FT, 2]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ9-FT2-D235
9,"[Shirt, MA, 1]","[Polo, US, 3]","[Black, CL, 2]","[Nylon, MAT, 2]","[Double Extra Large, SZ, 9]","[Female, FT, 3]",[Legendary Icon AND Legendary Wordmark AND Ban...,Legendary Polo,MAUS3-CL2-MAT2-SZ9-FT3-D235


## SKU number capture test

run the above functions to generate SKUs and write them into the initialized csv file.

### Prepare Data For Parsing

In [290]:
# Load the data
tables = load_catalog_data("CHARTS/catalog.numbers")

# Validate and extract feature lists
code_sheet_list, catalog_feature_list = validate_catalog_features(tables)

Sheet names and catalog features MATCH exactly!


### Write generated SKUS to CSV

Start with a Pandas dataframe