In [6]:
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials
import logging
# ../utils/sku_to_qtt_map_generator.py
from utils.sku_to_qtt_map_generator import sku_to_qtt_map_generator

logger = logging.getLogger(__name__)



In [7]:
all_listings_mapping_NA = pd.read_csv('preparing/data/all_listings_mapping_NA.csv', dtype=str)
BS_export_df = pd.read_csv('preparing/data/STOCK-STATUS202407098.952568.TXT', sep='\t', encoding='ascii', skiprows=2, dtype=str)

print(all_listings_mapping_NA.columns)
print(BS_export_df.columns)

Index(['seller_sku', 'ASIN_US', 'status_US', 'fulfillment_US', 'ASIN_CA',
       'status_CA', 'fulfillment_CA', 'ASIN_MX', 'status_MX', 'fulfillment_MX',
       'BS_SKU', 'B_SKU', 'B_NZ_SKU'],
      dtype='object')
Index(['CODE', 'NAME', 'SIZE', 'Unnamed: 3', 'Unnamed: 4', 'O-H', 'COM', 'B-O',
       'P-O', 'AVAIL', 'PROD TYPE'],
      dtype='object')


In [19]:


def retrieve_sku_mapping(region, use_local=True, statuses_allowed=['Active', 'Inactive','Incomplete']):

    allowed_values = ['US', 'CA', 'MX']
    if region not in allowed_values:
        raise ValueError(f"Invalid region '{region}'. Allowed values are {allowed_values}.")
    
    status_column = f'status_{region}'


    if use_local:
        source_df = pd.read_csv('preparing/data/all_listings_mapping_NA.csv', dtype=str)
    else:
        scopes = ["https://www.googleapis.com/auth/spreadsheets"]
        creds = Credentials.from_service_account_file("credentials/sheets_api_cred.json", scopes=scopes)
        client = gspread.authorize(creds)

        sheet_id = "1ZMzIMn7CzV_tUJSfXguHYLh3fkkgHVh_0u2NBWCzEAQ"
        workbook = client.open_by_key(sheet_id)

        worksheet_list = map(lambda x: x.title, workbook.worksheets())
        source_worksheet_name = "final_NA_mapping"

        if source_worksheet_name not in worksheet_list:
            raise ValueError(f"Worksheet {source_worksheet_name} not found in the google sheet")
        # get the worksheet to df 
        source_worksheet = workbook.worksheet(source_worksheet_name)
        records = source_worksheet.get_all_records()
        # print(records)
        source_df = pd.DataFrame(records, dtype=str)
    # filter the empty status columns
    source_df = source_df[source_df[status_column].notna()]
    source_df = source_df[source_df[status_column] != '']
    source_df = source_df[source_df[status_column].isin(statuses_allowed)]
    # filter the BS_SKU columns from nan and empty values
    source_df = source_df[source_df['BS_SKU'].notna()]
    source_df = source_df[source_df['BS_SKU'] != '']
    # to dict map
    amazon_sku_to_BS_sku = dict(zip(source_df['seller_sku'], source_df['BS_SKU']))
    
    return amazon_sku_to_BS_sku



result = get_amazon_NA_sku_to_BS_sku_map('US')
print(result)


{'001BF_5_Var': '001BF', '001BF_6_': '001BF', '001BF_Sample_': '001BFS', '001CY_5_Var': '001CY', '001GF_5_Var': '001GF', '001GF_6': '001GF', '001GF_6_X': '001GF', '001GF_Sample_': '001GFS', '001OB_5_Var': '001OB', '001OB_6_': '001OB', '001OB_Sample_': '001OBS', '001OB_Sample_XX': '001OBS', '001PB_1': '001PB', '001PB_5': '001PB', '001PB_5_': '001PB', '001PB_S': '001PBS', '002BS_5_': '002BS', '002BS_5_Var': '002BS', '002BS_6_': '002BS', '002BS_Sample_': '002BS', '002BS_Sample_XX': '002BS', '003BS_5_Var': '003BS', '009CB_6': '009CB', '009CB_Sample': '009CBS', '018OG_5_': '018OG', '018OG_5_Var': '018OG', '018OG_6_': '018OG', '018OG_Sample': '018OGS', '018OG_Sample_XX': '018OGS', '077GM_5_': '077GM', '077GM_5_Var': '077GM', '077GM_6_': '077GM', '077GM_Sample_': '077GMS', '149TO_5_': '149TO', '149TO_5_Var': '149TO', '149TO_6_': '149TO', '149TO_Sample_': '149TOS', '186FB_6_': '186FB', '186FB_6_ XX': '186FB', '186FB_Sample': '186FBS', '1D-6MG9-K1RS': '703531', '237SR_5_': '237SR', '237SR_6_': 