In [15]:
import pandas as pd
import json, re, time, logging, os, requests
from dotenv import load_dotenv
from requests.exceptions import HTTPError

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load environment variables from .env file
load_dotenv()

# Set up your API credentials
client_id = os.getenv('BIGCOMMERCE_CLIENT_ID')
access_token = os.getenv('BIGCOMMERCE_ACCESS_TOKEN')
store_hash = os.getenv('BIGCOMMERCE_STORE_HASH')

# Define the base URL for the BigCommerce API
base_url = f'https://api.bigcommerce.com/stores/{store_hash}/v3/'

# Define common headers
headers = {
    'X-Auth-Client': client_id,
    'X-Auth-Token': access_token,
    'Content-Type': 'application/json',
}

def build_url(endpoint, page=1, limit=250, params=None):
    url = f'{base_url}{endpoint}?limit={limit}&page={page}'
    if params:
        url += f'&{params}'
    return url

def handle_request(url):
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except HTTPError as http_err:
        logging.error(f'HTTP error occurred: {http_err}')
        return None
    except Exception as err:
        logging.error(f'An error occurred: {err}')
        return None

def fetch_data(endpoint, params=None):
    """Fetch data from the BigCommerce API with pagination."""
    page = 1
    limit = 250
    data = []

    while True:
        url = build_url(endpoint, page, limit, params)
        response_data = handle_request(url)
        if response_data is None:
            break
        
        data.extend(response_data['data'])
        next_page_link = response_data['meta']['pagination']['links'].get('next')
        if not next_page_link:
            break
        
        page += 1
        time.sleep(.2)  # Respect rate limits
    
    return data


def get_all_brands():
    """Fetch all brands."""
    return fetch_data('catalog/brands')


def get_all_products():
    """Fetch all products and their variants."""
    return fetch_data('catalog/products', 'include=variants')

def filter_keywords(name):
    """Check if a product name contains any of the specified keywords."""
    keywords = ["rebajado", "oferta", "antes"]
    return any(re.search(keyword, name, re.IGNORECASE) for keyword in keywords)

def filter_supplier_data():
    with open('brands.json', 'r') as file:
        brand_info = json.load(file)

    brand_abbreviations = {brand['abbreviation'] for brand in brand_info}
    supplier_data = pd.read_csv('supplier.csv')

    # Filter out products with specified keywords in the name
    keyword_filtered_data = supplier_data[~supplier_data['Nombre'].apply(filter_keywords)]

    # Use apply to utilize the extract_brand_abbr function
    filtered_supplier_data = keyword_filtered_data[keyword_filtered_data['Nombre'].apply(extract_brand_abbr).isin(brand_abbreviations)]

    return filtered_supplier_data


def compare_data(filtered_supplier_data, all_products, brands_dict):
    comparison_results = []
    brand_mapping = {brand['abbreviation']: brand['full_name'] for brand in json.load(open('brands.json', 'r'))}

    supplier_dict = {row['Nombre']: row for index, row in filtered_supplier_data.iterrows()}
    fetched_dict = {product['name']: product for product in all_products}

    for supplier_product_name, supplier_row in supplier_dict.items():
        supplier_price = supplier_row['Precio']
        supplier_brand_abbr = supplier_product_name.split()[0]
        brand_name = brand_mapping.get(supplier_brand_abbr)
        
        if not brand_name:
            logging.warning(f'No matching brand found for abbreviation: {supplier_brand_abbr}')
            continue
        
        fetched_product = fetched_dict.get(supplier_product_name)
        if fetched_product:
            status = 'Updated' if float(supplier_price) != float(fetched_product['cost_price']) else 'Existing'
            visibility = True  # Product is visible as it's present in supplier data
            availability = 'available'  # Product is available for purchase as it's present in supplier data
            comparison_results.append(generate_comparison_result(fetched_product, supplier_row, brand_name, status, visibility, availability))
            
            for variant in fetched_product.get('variants', []):
                status = 'Updated' if float(supplier_price) != float(variant['cost_price']) else 'Existing'
                comparison_results.append(generate_comparison_result(variant, supplier_row, brand_name, status, visibility, availability))
        else:
            status = 'New'
            visibility = True  # New products are visible and available by default
            availability = 'available'
            new_product_dict = {
                'id': None,
                'name': supplier_product_name,
                'sku': None,
                'cost_price': None,
                # Additional fields can be added as needed to map to BigCommerce attributes
            }
            comparison_results.append(generate_comparison_result(new_product_dict, supplier_row, brand_name, status, visibility, availability))

    for fetched_product_name, fetched_product in fetched_dict.items():
        if fetched_product_name not in supplier_dict:
            status = 'Removed'
            brand_name = brands_dict.get(fetched_product['brand_id'], 'Unknown Brand')
            visibility = False  # Product is not visible as it's not present in supplier data
            availability = 'disabled'  # Product is not available for purchase as it's not present in supplier data
            comparison_results.append(generate_comparison_result(fetched_product, None, brand_name, status, visibility, availability))

            for variant in fetched_product.get('variants', []):
                comparison_results.append(generate_comparison_result(variant, None, brand_name, status, visibility, availability))

    return pd.DataFrame(comparison_results)



def calculate_selling_price(cost, tax_rate=0.16, stripe_rate=0.029, stripe_fixed_fee=0.30, profit_margin=0.20):
    """Calculate selling price based on cost, tax rate, stripe rate, stripe fixed fee, and desired profit margin."""
    total_cost = cost * (1 + tax_rate) + stripe_fixed_fee  # Including tax and stripe fixed fee
    selling_price = total_cost / (1 - profit_margin)  # Adjusting for desired profit margin
    return round(selling_price, 2)  # Rounding to 2 decimal places for currency format

def extract_weight_in_kg(product_name):
    weight_regex = re.compile(
        r'(\d{1,3}(?:,\d{3})*(\.\d+)?)(?:\s)*(lbs|sticks|LB|tabs|softgels|packs|pack|grs|ct|serv|ml|caps|oz|g|kg|tablets)', re.I)
    match = weight_regex.search(product_name)
    try:
        if match:
            weight = float(match.group(1).replace(',', ''))
            unit = match.group(3).lower()
            if unit in ['kg', 'g']:
                return weight if unit == 'kg' else weight / 1000
            elif unit in ['lbs', 'LB']:
                return weight * 0.453592  # converting lbs to kg
            # Add conversions for other units if necessary
    except Exception as e:
        logging.error(f"Error in extract_weight_in_kg: {e}")
    return None

def generate_comparison_result(product_or_variant, supplier_row, brand_name, status, visibility, availability, price_update=None):
    weight = extract_weight_in_kg(supplier_row['Nombre']) if supplier_row is not None else None
    result_dict = {
        'ID': product_or_variant.get('id', None),
        'Product Type': 'variant' if 'product_id' in product_or_variant else 'product',
        'Name': supplier_row['Nombre'] if supplier_row is not None else product_or_variant.get('name', None),
        'SKU': product_or_variant.get('sku', None),
        'Price': supplier_row['Precio'] if supplier_row is not None else product_or_variant.get('calculated_price', None),
        'Cost': product_or_variant.get('cost_price', None),
        'Brand': brand_name,
        'Status': status,
        'Visibility': visibility,
        'Availability': availability,
        'Price Update': price_update,
        'Weight': weight
    }
    return result_dict

def prepare_data(products, brands_dict):
    data = []
    columns = ['ID', 'Product Type', 'Name', 'SKU', 'Price', 'Cost', 'Brand', 'Selling Price', 'Weight']
    
    for product in products:
        cost = float(product['cost_price']) if product['cost_price'] else 0.0
        selling_price = calculate_selling_price(cost)
        weight = extract_weight_in_kg(product['name'])
        product_row = {
            'ID': product['id'],
            'Product Type': 'product',
            'Name': product['name'],
            'SKU': product['sku'],
            'Price': product['calculated_price'],
            'Cost': product['cost_price'],
            'Brand': brands_dict.get(product['brand_id'], 'Unknown Brand'),
            'Selling Price': selling_price,
            'Weight': weight,
            'Category':114
        }
        data.append(product_row)
        
        for variant in product.get('variants', []):
            variant_row = {
                'ID': variant['id'],
                'Product Type': 'variant',
                'Name': product['name'],
                'SKU': variant['sku'],
                'Price': variant['calculated_price'],
                'Cost': variant['cost_price'],
                'Brand': brands_dict.get(product['brand_id'], 'Unknown Brand'),
                'Selling Price': selling_price,  # Assuming same selling price for variants
                'Weight': weight,  # Assuming same weight for variants
                'Category':114
            }
            data.append(variant_row)
    
    return pd.DataFrame(data, columns=columns)


def main():
    logging.info('Starting data processing...')
    
    logging.info('Filtering supplier data...')
    filtered_supplier_data = filter_supplier_data()
    
    logging.info('Fetching all products...')
    all_products = get_all_products()
    
    logging.info('Fetching all brands...')
    brands_dict = {brand['id']: brand['name'] for brand in get_all_brands()}
    
    logging.info('Comparing data...')
    comparison_df = compare_data(filtered_supplier_data, all_products, brands_dict)
    
    comparison_file = 'comparison_results.csv'
    logging.info(f'Saving comparison results to {comparison_file}...')
    comparison_df.to_csv(comparison_file, index=False)

    logging.info('Data processing completed.')

if __name__ == '__main__':
    main()


INFO:root:Starting data processing...
INFO:root:Filtering supplier data...
INFO:root:Fetching all products...
INFO:root:Fetching all brands...
INFO:root:Comparing data...
INFO:root:Saving comparison results to comparison_results.csv...
INFO:root:Data processing completed.


In [14]:
import pandas as pd
import re
import json
from collections import defaultdict

def read_csv(file_path):
    """Read a CSV file into a Pandas DataFrame."""
    return pd.read_csv(file_path)

def extract_weight_in_kg(product_name):
    weight_regex = re.compile(
        r'(\d{1,3}(?:,\d{3})*(\.\d+)?)(?:\s)*(lbs|sticks|LB|tabs|softgels|packs|pack|grs|ct|serv|ml|caps|oz|g|kg|tablets)', re.I)
    match = weight_regex.search(product_name)
    try:
        if match:
            weight = float(match.group(1).replace(',', ''))
            unit = match.group(3).lower()
            return weight, unit
    except Exception as e:
        print(f"Error in extract_weight_in_kg: {e}")
    return None, None

def separate_product_and_variant(row):
    brand = ""
    product_name = ""
    measurement = ""
    variant = None

    parts = row.split(" ")

    brand = parts[0]
    remaining_parts = parts[1:]
    
    remaining_str = " ".join(remaining_parts)
    
    weight, unit = extract_weight_in_kg(remaining_str)
    if weight and unit:
        measurement = f"{weight} {unit}"
        remaining_str = remaining_str.replace(f"{weight} {unit.upper()}", "").strip()

    remaining_parts = remaining_str.split(" ")

    if remaining_parts:
        is_unit = any(remaining_parts[-1].lower().endswith(u) for u in ['kg', 'lbs', 'g', 'ml'])
        if is_unit:
            variant = remaining_parts[-2]
            product_name = " ".join(remaining_parts[:-2]).strip()
        else:
            variant = remaining_parts[-1]
            product_name = " ".join(remaining_parts[:-1]).strip()

    return brand, product_name, measurement, variant

def main():
    file_path = 'supplier.csv'  # Update with your actual file path
    df = read_csv(file_path)
    
    product_dict = defaultdict(lambda: defaultdict(list))

    for index, row in df.iterrows():
        brand, product_name, measurement, variant = separate_product_and_variant(row['Nombre'])
        full_product_name = f"{brand} {product_name} {measurement}".strip()
        
        if variant:
            product_dict[full_product_name]['Variants'].append(variant)
        
        if measurement:
            product_dict[full_product_name]['Measurement'] = measurement

        product_dict[full_product_name]['Price'] = row.get('Precio', None)

    product_json = json.dumps(product_dict, indent=4)
    
    with open('product_variants.json', 'w') as f:
        f.write(product_json)

    print("JSON file generated successfully.")

if __name__ == '__main__':
    main()


JSON file generated successfully.


In [20]:
import pandas as pd
import re
import json
from collections import defaultdict

def read_csv(file_path):
    return pd.read_csv(file_path)

def extract_weight_in_kg(product_name):
    weight_regex = re.compile(
        r'(\d{1,3}(?:,\d{3})*(\.\d+)?)(?:\s)*(lbs|sticks|LB|tabs|softgels|packs|pack|grs|ct|serv|ml|caps|oz|g|kg|tablets)', re.I)
    match = weight_regex.search(product_name)
    try:
        if match:
            weight = float(match.group(1).replace(',', ''))
            unit = match.group(3).lower()
            return weight, unit
    except Exception as e:
        print(f"Error in extract_weight_in_kg: {e}")
    return None, None

known_units = ['grs', 'lbs', 'kg', 'ml', 'serv', 'tabs', 'softgels', 'packs', 'pack', 'g', 'ct', 'caps', 'oz']

def generate_training_data(sample_data):
    training_data = []
    for product in sample_data:
        entities = []
        parts = product.split(" ")
        brand_end = len(parts[0])
        entities.append((0, brand_end, "BRAND"))
        
        product_name_start = brand_end + 1
        product_name_end = product_name_start + len(parts[1])
        entities.append((product_name_start, product_name_end, "PRODUCT_NAME"))
        
        weight, unit = extract_weight_in_kg(product)
        if weight and unit:
            measurement_str = f"{weight} {unit.upper()}"
            measurement_start = product.find(measurement_str)
            measurement_end = measurement_start + len(measurement_str)
            entities.append((measurement_start, measurement_end, "MEASUREMENT"))
            
        last_word = parts[-1]
        if last_word.lower() not in known_units:
            variant_start = len(product) - len(last_word)
            variant_end = len(product)
            entities.append((variant_start, variant_end, "VARIANT"))
        
        training_data.append((product, {"entities": entities}))
    
    return training_data

df = read_csv('supplier.csv')  # Replace with the actual path to your CSV file

sample_data = df['Nombre'].sample(n=10, random_state=1)  # Adjust the sample size as needed
training_data_sample = generate_training_data(sample_data)

print(json.dumps(training_data_sample, indent=4))


[
    [
        "OUTLET ON 100% WHEY GOLD STD 5 LBS DOUBLE CHOCOLATE SIN SELLO EXT-TAPA ROTA CAD 06/24",
        {
            "entities": [
                [
                    0,
                    6,
                    "BRAND"
                ],
                [
                    7,
                    9,
                    "PRODUCT_NAME"
                ],
                [
                    -1,
                    6,
                    "MEASUREMENT"
                ],
                [
                    80,
                    85,
                    "VARIANT"
                ]
            ]
        }
    ],
    [
        "GA NITRAFLEX ADVANCED FORMULA 30 SERV GREEN APPLE",
        {
            "entities": [
                [
                    0,
                    2,
                    "BRAND"
                ],
                [
                    3,
                    12,
                    "PRODUCT_NAME"
                ],
                [
                

In [17]:
!pip install spacy scikit-learn pandas
!python -m spacy download en_core_web_sm



Collecting en-core-web-sm==3.5.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [18]:
import requests
import os
import pandas as pd
import json
from dotenv import load_dotenv
from collections import defaultdict
import re
import threading
from ratelimiter import RateLimiter
import time
import logging
from requests.adapters import HTTPAdapter
from concurrent.futures import ThreadPoolExecutor, as_completed
from logging.handlers import RotatingFileHandler  # Import RotatingFileHandler


logging.basicConfig(level=logging.INFO)

# Load environment variables from .env file
load_dotenv()

# Set up your API credentials
client_id = os.getenv('BIGCOMMERCE_CLIENT_ID_SA')
access_token = os.getenv('BIGCOMMERCE_ACCESS_TOKEN_SA')
store_hash = os.getenv('BIGCOMMERCE_STORE_HASH_SA')

# Define the base URL for the BigCommerce API
base_url = f'https://api.bigcommerce.com/stores/{store_hash}/v3/'



class BigCommerceManager:
    def __init__(self, client_id, access_token, base_url):
        self.client_id = client_id
        self.access_token = access_token
        self.base_url = base_url
        self.brand_cache = {}
        self.session = requests.Session()
        adapter = HTTPAdapter(pool_connections=50, pool_maxsize=50)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)
        self.logger = logging.getLogger('BigCommerceManager')
        self.logger.setLevel(logging.INFO)
        handler = RotatingFileHandler('bigcommerce_manager.log', maxBytes=1e6, backupCount=3)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)


    def send_request(self, method, endpoint, json=None):
        url = f'{self.base_url}{endpoint}'
        headers = {
            'X-Auth-Client': self.client_id,
            'X-Auth-Token': self.access_token,
            'Content-Type': 'application/json',
        }
        try:
            with self.session.request(method, url, headers=headers, json=json) as response:
                response.raise_for_status()
                return response.json()['data']
        except requests.exceptions.RequestException as err:
            self.logger.error(f'Error: {err}, URL: {url}, Method: {method}')
            if response:
                self.logger.error(f'Response text: {response.text}')
                # Optionally print the response text to get more error details
                self.logger.debug(response.text)  # Remove this line after debugging
            raise



    def get_all_brands(self):
        try:
            with open('brands.json', 'r') as f:
                brand_mapping = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Failed to decode JSON: {e}")
            return {}
        brand_dict = {brand['abbreviation']: brand['full_name']
                      for brand in brand_mapping}
        return brand_dict

    def create_brand(self, brand_name):
        endpoint = 'catalog/brands'
        payload = {"name": brand_name}
        try:
            return self.send_request('POST', endpoint, json=payload)
        except requests.exceptions.HTTPError as err:
            response = err.response
            if response.status_code == 409 and 'The resource with that value already exists' in response.text:
                print(f"Brand {brand_name} already exists.")
                # Optionally, fetch and return the existing brand info
            else:
                print(f"Failed to create brand: {err}")
            return None

    def get_brand(self, brand_name):
        endpoint = 'catalog/brands'
        brands = self.send_request('GET', endpoint)
        return next((brand for brand in brands if brand['name'] == brand_name), None)

    def get_or_create_brand_id(self, brand_abbreviation):
        # First, check the cache
        #self.logger.info(f'Processing brand abbreviation: {brand_abbreviation}')  # Add logging

        if brand_abbreviation in self.brand_cache:
            brand_id = self.brand_cache[brand_abbreviation]
            return brand_id, None  # Ensure a tuple is returned

        # If not in cache, check the local JSON file
        local_brands = self.get_all_brands()
        if brand_abbreviation not in local_brands:
            print(
                f"Brand abbreviation {brand_abbreviation} not found in local brands.")
            return None  # Exit early if the brand abbreviation is not recognized

        # Get the full brand name
        full_brand_name = local_brands[brand_abbreviation]

        # Now search for the brand in BigCommerce using the full brand name
        existing_brand = self.get_brand(full_brand_name)
        if existing_brand:
            brand_id = existing_brand['id']
            #self.logger.info(f'Found existing brand_id {brand_id} for brand_name {full_brand_name}')  # Add logging
        else:
            print(f"No brand found for name: {full_brand_name}, attempting to create...")
            created_brand = self.create_brand(full_brand_name)
            if created_brand:
                brand_id = created_brand['id']
                #self.logger.info(f'Successfully created brand {full_brand_name} with brand_id {brand_id}')  # Add logging
            else:
                #self.logger.error(f'Failed to create brand for name: {full_brand_name}')  # Add logging
                return (None, None)  # Modified line to return a tuple
        if brand_id is None:
            return None, None  # Ensure a tuple is returned


        # Cache the brand_id for future reference
        self.brand_cache[brand_abbreviation] = brand_id
        #self.logger.info(f"Cached brand_id {brand_id} for brand_name {full_brand_name}")

        return brand_id, full_brand_name

    def create_product(self, product_data, options=None):
        endpoint = 'catalog/products'
        payload = product_data
        if options:
            payload['options'] = options
        
        # Ensure price and weight are numerical values
        if payload.get('price'):
            payload['price'] = float(payload['price'])
        if payload.get('weight'):
            payload['weight'] = float(payload['weight'])

        self.logger.debug(f'Sending product data: {json.dumps(payload, indent=2)}')
        try:
            return self.send_request('POST', endpoint, json=payload)
        except requests.exceptions.HTTPError as err:
            print(f"Failed to create product: {err}")
            return None
    
    def create_variant(self, variant_data):
            product_id = variant_data['product_id']
            endpoint = f'catalog/products/{product_id}/variants'
            try:
                return self.send_request('POST', endpoint, json=variant_data)
            except requests.exceptions.HTTPError as err:
                response = err.response
                self.logger.error(
                    f"Failed to create variant for product ID {product_id}: {err}",
                    exc_info=True  # This will log the traceback as well
                )
                self.logger.error(f"Response body: {response.text}")
                return None

    def create_option(self, product_id, option_data):
        endpoint = f'catalog/products/{product_id}/options'
        try:
            return self.send_request('POST', endpoint, json=option_data)
        except requests.exceptions.HTTPError as err:
            response = err.response
            print(f"Failed to create option: {response.text}")
            return None

    def update_product(self, product_id, updates):
        endpoint = f'catalog/products/{product_id}'
        try:
            return self.send_request('PUT', endpoint, json=updates)
        except requests.exceptions.HTTPError as err:
            print(f"Failed to update product {product_id}: {err}")
            return None

    def get_all_products(self):
        products = []
        page = 1
        # Set a limit to the number of products per page (max is 250)
        limit = 250

        while True:
            # The URL for the current page of products without brand_ids
            endpoint = f'catalog/products?limit={limit}&page={page}&include=variants'
            products_page = self.send_request('GET', endpoint)
            products.extend(products_page)

            # Check for a next page of products
            if len(products_page) < limit:
                break

            # Increment the page number for the next iteration
            page += 1

        return products


class SupplierCSVProcessor:
    def __init__(self, bc_manager):
        self.bc_manager = bc_manager
        self.brand_abbreviations = set(self.bc_manager.get_all_brands().keys())
        self.logger = logging.getLogger('SupplierCSVProcessor')  # Initialize logger
        self.logger.setLevel(logging.INFO)
        handler = RotatingFileHandler('supplier_processor.log', maxBytes=1e6, backupCount=3)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

    @staticmethod   
    def parse_product_name(product_name):
        brand_match = re.match(r'^([A-Z]+)', product_name)
        brand = brand_match.group(1) if brand_match else None

        offer_info = None
        offer_match = re.search(r'\*.*\*$', product_name)
        if offer_match:
            offer_info = offer_match.group(0)
            product_name = re.sub(re.escape(offer_info), '',
                                product_name)  # remove offer info

        quantity_info = None
        quantity_match = re.search(r'\(.*\)', product_name)
        if quantity_match:
            quantity_info = quantity_match.group(0)
            product_name = re.sub(re.escape(quantity_info),
                                '', product_name)  # remove quantity info

        remaining_parts = product_name.strip().split()

        flavor_variant = remaining_parts.pop() if len(remaining_parts) > 1 else None

        product_common_name = ' '.join(remaining_parts)

        words = product_name.split()
        parent_name = None
        variant_descriptor = None
        for i in range(len(words)):
            candidate_parent_name = ' '.join(words[:i])
            candidate_variant_descriptor = ' '.join(words[i:])
            if candidate_parent_name and candidate_variant_descriptor:
                parent_name = candidate_parent_name
                variant_descriptor = candidate_variant_descriptor
                break  # Exit the loop once we've found a candidate split
        
        sku = product_name.replace(' ', '_')  # Replace spaces with underscores to generate SKU
        return {
            'brand': brand,
            'product_common_name': product_common_name,
            'quantity_info': quantity_info,
            'flavor_variant': flavor_variant,
            'offer_info': offer_info,
            'parent_name': parent_name,
            'variant_descriptor': variant_descriptor,
            'sku': sku  # Include SKU in the returned dictionary
        }
    
    @staticmethod
    def create_product_variant_mapping(supplier_df):
        product_variant_mapping = defaultdict(lambda: {'variants': [], 'brand': None})
        for index, row in supplier_df.iterrows():
            parsed_info = SupplierCSVProcessor.parse_product_name(row['Nombre'])
            common_name = parsed_info['parent_name']
            variant_descriptor = parsed_info['variant_descriptor']
            brand = parsed_info['brand']
            product_variant_mapping[common_name]['variants'].append(variant_descriptor)
            product_variant_mapping[common_name]['brand'] = brand
        return product_variant_mapping
        
        

    @staticmethod
    def categorize_products(supplier_df):
        product_categories = defaultdict(lambda: {'variants': [], 'brand': None})
        for index, row in supplier_df.iterrows():
            parsed_info = SupplierCSVProcessor.parse_product_name(row['Nombre'])
            parent_name = parsed_info['parent_name']
            
            variant_descriptor = parsed_info['variant_descriptor']
            brand = parsed_info['brand']
            if parent_name and variant_descriptor:
                product_categories[parent_name]['variants'].append(variant_descriptor)
                product_categories[parent_name]['brand'] = brand
        return product_categories
    


    @staticmethod
    def extract_weight_in_kg(product_name):
            weight_regex = re.compile(
                r'(\d{1,3}(?:,\d{3})*(\.\d+)?)(?:\s)*(lbs|sticks|LB|tabs|softgels|packs|pack|grs|ct|serv|ml|caps|oz|g|kg|tablets)', re.I)
            match = weight_regex.search(product_name)
            try:
                if match:
                    weight = float(match.group(1).replace(',', ''))
                    unit = match.group(3).lower()

                    conversion_factors = {
                        'oz': 0.0283495,
                        'lbs': 0.453592,
                        'lb': 0.453592,
                        'grs': 0.001,
                        'g': 0.001,
                        'ct': 0.0005,
                        'caps': 0.0005,
                        'tablets': 0.0005,
                        'tabs': 0.0005,
                        'softgels': 0.0005,
                        'stick': 0.0005,
                        'packs': 0.008,
                        'pack': 0.008,
                        'ml': 0.001,
                        'serv': 0.015,
                        'kg': 1,
                    }

                    # Default to 1 if unit is not recognized
                    weight *= conversion_factors.get(unit, 1)
                    return round(weight, 2)
            except Exception as e:
                logging.error(f"Error in extract_weight_in_kg: {e}")

            return None  # Default None if no match

    def process_supplier_csv(self):
                supplier_df = pd.read_csv('supplier.csv')
                products_dict = defaultdict(lambda: {
                    'variants': [],
                    'brand_id': None,
                    'name': None,
                })

                for index, row in supplier_df.iterrows():
                    parsed_info = SupplierCSVProcessor.parse_product_name(row['Nombre'])
                    brand_name = parsed_info['brand']
                    if brand_name not in self.brand_abbreviations:
                        continue  # Skip this product if the brand abbreviation is not recognized
                    common_name = parsed_info['product_common_name']
                    variant_descriptor = parsed_info['flavor_variant']
                    brand_id, brand_name = self.bc_manager.get_or_create_brand_id(brand_name)
                    #self.logger.info(f'Processed brand abbreviation: {brand_name}, obtained brand_id: {brand_id}')

                    if brand_id is None or brand_name is None:
                        #self.logger.warning(f"Failed to get or create brand for abbreviation: {brand_name}")
                        continue  # Skip this iteration and proceed with the next row
                    weight_kg = self.extract_weight_in_kg(row['Nombre'])
                    if weight_kg is not None:
                        weight_kg = float(weight_kg)
                    variant = {
                        'descriptor': variant_descriptor,
                        'price': row['Precio'],
                        'weight': weight_kg,  # This will either be a float or None
                    }
                    products_dict[common_name]['variants'].append(variant)
                    # Assume the brand_id is the same for all variants
                    products_dict[common_name]['brand_id'] = brand_id
                    products_dict[common_name]['brand_name'] = brand_name  # Add this line
                    products_dict[common_name]['name'] = common_name

                return products_dict


rate_limiter = RateLimiter(max_calls=50, period=1)  # Adjust as per the API rate limits

def process_product(bc_manager, product_name, product_info):
    try:
        with rate_limiter:
            brand_id, brand_name = product_info['brand_id'], product_info['brand_name']  # Adjusted line
            if not brand_id:
                print(f"Missing brand_id for product: {product_name}")
                return  # Exit early if brand_id is missing

            sku = product_name  # Assuming SKU is the product_name
            if sku in existing_skus:
                # Existing product, update if necessary
                existing_product = existing_skus[sku]
                updates = {
                    'is_visible': True,
                    'brand_id': brand_id,
                }
                bc_manager.update_product(existing_product['id'], updates)
            else:
                # New product, create it
                new_product_data = {
                    'name': product_name,
                    'type': 'physical',
                    'brand_id': brand_id,
                    'sku': sku,
                    'price': product_info['variants'][0]['price'],  # Assuming price is the same for all variants
                    'weight': product_info['variants'][0]['weight'],  # Assuming weight is the same for all variants
                     }
                logging.debug(new_product_data)  # Debugging line
                created_product = bc_manager.create_product(new_product_data)
                if created_product is None:
                    print(f"Failed to create product for SKU: {sku}")
                    return

                option_data = {
                    'name': 'Variant',
                    'display_name': 'Variant',
                    'type': 'dropdown',
                    'option_values': [{'label': str(variant['descriptor'])} for variant in product_info['variants']]
                }
                
                created_option = bc_manager.create_option(created_product['id'], option_data)
                if created_option is None:
                    logging.debug(f"Failed to create option for product ID: {created_product['id']}")
                    return

                for variant in product_info['variants']:
                    variant_descriptor = str(variant['descriptor']) if variant['descriptor'] else 'Unknown'

                    variant_data = {
                        'product_id': created_product['id'],
                        'sku': f"{sku}-{variant_descriptor}",
                        'option_values': [
                            {
                                'option_display_name': 'Variant',
                                'label': variant,
                            }
                        ],
                    }
                    bc_manager.create_variant(variant_data)
    except Exception as e:
        logging.error(f"Failed to process product {product_name}: {e}", exc_info=True)
        

def main():
    try:
        bc_manager = BigCommerceManager(client_id, access_token, base_url)
        supplier_processor = SupplierCSVProcessor(bc_manager)

        global existing_skus
        all_products = bc_manager.get_all_products()
        supplier_products = supplier_processor.process_supplier_csv()  # Corrected line
        existing_skus = {product['sku']: product for product in all_products}

        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = [executor.submit(process_product, bc_manager, product_name, product_info)
                       for product_name, product_info in supplier_products.items()]

            for future in as_completed(futures):  # Import as_completed from concurrent.futures
                # Optionally, log the result or check for exceptions
                if future.exception() is not None:
                    logging.error(f"Exception: {future.exception()}")

    except Exception as e:
        logging.error(f"Exception in main: {e}",exc_info=True)

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        handlers=[RotatingFileHandler('application.log', maxBytes=1e6, backupCount=3)])
    main()

ERROR:BigCommerceManager:Error: 422 Client Error: Unprocessable Entity for url: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products, URL: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products, Method: POST
ERROR:BigCommerceManager:Error: 422 Client Error: Unprocessable Entity for url: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products, URL: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products, Method: POST


Failed to create product: 422 Client Error: Unprocessable Entity for url: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products
Failed to create product for SKU: MUT AIR FACE MASK CARBON 2 LAYER
Failed to create product: 422 Client Error: Unprocessable Entity for url: https://api.bigcommerce.com/stores/p9c875wszc/v3/catalog/products
Failed to create product for SKU: SYN AEROBAG DRAWSTRING BLACK
