In [None]:
import numpy as np
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import os



In [273]:
raw_files = os.listdir('data/rawpo/xlsx')
display(raw_files)

def convert_xlsx_to_csv(directory):
    """
    Convert all .xlsx files in the specified directory to .csv format.
    Skips files that already have a corresponding .csv file.
    
    Args:
        directory (str): Path to the directory containing .xlsx files
    """
    # Convert to Path object for easier handling
    dir_path = Path(directory)
    
    # Find all .xlsx files in the directory
    xlsx_files = list(dir_path.glob('*.xlsx'))
    
    if not xlsx_files:
        print(f"No .xlsx files found in {directory}")
        return
    
    print(f"Found {len(xlsx_files)} .xlsx files to process...")
    
    for xlsx_file in xlsx_files:
        # Create output filename with .csv extension
        csv_file = xlsx_file.with_suffix('.csv')
        
        # Skip if CSV already exists
        if csv_file.exists():
            print(f"Skipping {xlsx_file.name} - {csv_file.name} already exists")
            continue
            
        try:
            # Read the Excel file
            print(f"\n===========================================Converting {xlsx_file.name} to {csv_file.name}...")
            df = pd.read_excel(xlsx_file)
            
            # Write to CSV
            df.to_csv(csv_file, index=False, encoding='utf-8')
            print(f"Successfully created {csv_file.name}")
            
        except Exception as e:
            print(f"Error processing {xlsx_file.name}: {str(e)}")
    
    print("Conversion complete!")

# Example usage:
convert_xlsx_to_csv('data/rawpo/xlsx')

['12 Miss Glam Bangka.xlsx',
 '21 Miss Glam Sutomo.xlsx',
 '32 Miss Glam Soeta.xlsx',
 '17 Miss Glam Dumai.xlsx',
 '27 Miss Glam Padang Sidimpuan.xlsx',
 '02 Miss Glam Pekanbaru.xlsx',
 '22 Miss Glam Pasaman Barat.xlsx',
 '11 Miss Glam Damar.xlsx',
 '05 Miss Glam Panam.xlsx',
 '09 Miss Glam Medan.xlsx',
 '26 Miss Glam Mansyur.xlsx',
 '19 Miss Glam Rantau Prapat.xlsx',
 '14 Miss Glam Solok.xlsx',
 '03 Miss Glam Jambi.csv',
 '13 Miss Glam Payakumbuh.xlsx',
 '03 Miss Glam Jambi.xlsx',
 '31 Miss Glam Mayang.xlsx',
 '33 Miss Glam Balikpapan.xlsx',
 '20 Miss Glam Tanjung Pinang.xlsx',
 '04 Miss Glam Bukittinggi.xlsx',
 '08 Miss Glam Bengkulu.xlsx',
 '23 Miss Glam Halat.xlsx',
 '29 Miss Glam Marpoyan.xlsx',
 '06 Miss Glam Muaro Bungo.xlsx',
 '28 Miss Glam Aceh.xlsx',
 '25 Miss Glam Sudirman.xlsx',
 '07 Miss Glam Lampung.xlsx',
 '30 Miss Glam Sei Penuh.xlsx',
 '24 Miss Glam Duri.xlsx',
 '04 Miss Glam Bukittinggi.csv',
 '15 Miss Glam Tembilahan.xlsx',
 '18 Miss Glam Kedaton.xlsx',
 '16 Miss Gla

Found 32 .xlsx files to process...

Error processing 12 Miss Glam Bangka.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 21 Miss Glam Sutomo.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 32 Miss Glam Soeta.xlsx: invalid literal for int() with base 10: 'INF'

Error processing 17 Miss Glam Dumai.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 27 Miss Glam Padang Sidimpuan.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 02 Miss Glam Pekanbaru.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 22 Miss Glam Pasaman Barat.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 11 Miss Glam Damar.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 05 Miss Glam Panam.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 09 Miss Glam Medan.xlsx: invalid literal for int() with base 10: 'NAN'

Error processing 26 Miss Glam Mansyur.xlsx: inval

In [274]:
# Read the Excel file, converting 'INF' to numpy.inf
ori_df = pd.read_csv('data/rawpo/01 Miss Glam Padang.csv', sep=';', decimal=',')

# Convert all numeric columns, handling infinity and NaN values
for col in ori_df.select_dtypes(include=[np.number]).columns:
    ori_df[col] = pd.to_numeric(ori_df[col], errors='coerce')

df = ori_df.copy()
df = df.rename(columns={'Stok': 'Stock'})

pd.set_option('display.max_columns', None)

# extract only the columns we need
display(df.info())
df = df[['Brand', 'SKU', 'Nama', 'Stock', 'Daily Sales', 'Max. Daily Sales', 'Lead Time', 'Max. Lead Time', 'Sedang PO', 'Min. Order', 'HPP']]

display(df)

# contribution dictionary for each store location
contribution_dict = {
    'payakumbuh': 0.47,
}

FileNotFoundError: [Errno 2] No such file or directory: 'data/rawpo/01 Miss Glam Padang.csv'

In [260]:
# supplier mapping
# to map an SKU and brand to specific supplier

raw_supplier_df = pd.read_csv('data/supplier.csv', sep=';', decimal=',')
raw_supplier_df = raw_supplier_df.fillna('')

display(raw_supplier_df)

FileNotFoundError: [Errno 2] No such file or directory: 'data/supplier.csv'

In [None]:
# First, convert object columns to numeric where possible
numeric_columns = ['Stock', 'Daily Sales', 'Lead Time', 'Max. Daily Sales', 'Max. Lead Time']

df_clean = df.copy()
display('Raw DataFrame: ', df)

# Convert all columns to numeric, coercing errors to NaN
for col in numeric_columns:
    df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')

# Now fill NA with 0 and convert to int
df_clean = df_clean.fillna(0)

# For non-numeric columns, keep them as they are
non_numeric_columns = ['Brand', 'SKU']  # Add other non-numeric columns if needed
for col in non_numeric_columns:
    df_clean[col] = df[col]  # Keep original values

# add new column 'Lead Time Sedang PO' default to 2 days
df_clean['Lead Time Sedang PO'] = 5

# Display the cleaned DataFrame
print("Cleaned DataFrame:")
display(df_clean)

# Show info of the cleaned DataFrame
print("\nDataFrame Info: Expected to have maximal non-null values...")
df_clean.info()

'Raw DataFrame: '

Unnamed: 0,Brand,SKU,Nama,Stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Sedang PO,Min. Order,HPP
0,ACNAWAY,10400614911,ACNAWAY 3 in 1 Acne Sun Serum Sunscreen Serum ...,0,0.00,0,4,23,0,1,57850
1,ACNAWAY,10100824612,ACNAWAY Mugwort Acne Clear Bar Soap 100gr,0,0.00,0,4,23,24,1,31000
2,ACNAWAY,10400517459,ACNAWAY Mugwort Daily Sunscreen Only For Acne ...,0,0.00,0,4,23,16,1,0
3,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,2,0.80,4,4,23,25,1,31930
4,ACNAWAY,10500637717,ACNAWAY Mugwort Gel Mask Anti Pores Masker Gel...,35,0.36,8,4,23,0,1,33908
...,...,...,...,...,...,...,...,...,...,...,...
8118,ZARA,304002157530,ZARA Eau De Parfum Hibiscus 90ml,0,0.00,0,4,23,0,3,0
8119,ZARA,30400321119,ZARA Eau De Toilette Go Fruity 90ml,7,0.00,0,4,23,0,3,237002
8120,ZARA,30400320908,ZARA Eau De Toilette Peony 90ml,3,0.02,1,4,23,0,3,237002
8121,ZARA,30400439849,ZARA Eau De Toilette Twilight Mauve 90ml,8,0.00,0,4,23,0,3,237002


Cleaned DataFrame:


Unnamed: 0,Brand,SKU,Nama,Stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Sedang PO,Min. Order,HPP,Lead Time Sedang PO
0,ACNAWAY,10400614911,ACNAWAY 3 in 1 Acne Sun Serum Sunscreen Serum ...,0,0.00,0,4,23,0,1,57850,5
1,ACNAWAY,10100824612,ACNAWAY Mugwort Acne Clear Bar Soap 100gr,0,0.00,0,4,23,24,1,31000,5
2,ACNAWAY,10400517459,ACNAWAY Mugwort Daily Sunscreen Only For Acne ...,0,0.00,0,4,23,16,1,0,5
3,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,2,0.80,4,4,23,25,1,31930,5
4,ACNAWAY,10500637717,ACNAWAY Mugwort Gel Mask Anti Pores Masker Gel...,35,0.36,8,4,23,0,1,33908,5
...,...,...,...,...,...,...,...,...,...,...,...,...
8118,ZARA,304002157530,ZARA Eau De Parfum Hibiscus 90ml,0,0.00,0,4,23,0,3,0,5
8119,ZARA,30400321119,ZARA Eau De Toilette Go Fruity 90ml,7,0.00,0,4,23,0,3,237002,5
8120,ZARA,30400320908,ZARA Eau De Toilette Peony 90ml,3,0.02,1,4,23,0,3,237002,5
8121,ZARA,30400439849,ZARA Eau De Toilette Twilight Mauve 90ml,8,0.00,0,4,23,0,3,237002,5



DataFrame Info: Expected to have maximal non-null values...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8123 entries, 0 to 8122
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Brand                8123 non-null   object 
 1   SKU                  8123 non-null   int64  
 2   Nama                 8123 non-null   object 
 3   Stock                8123 non-null   int64  
 4   Daily Sales          8123 non-null   float64
 5   Max. Daily Sales     8123 non-null   int64  
 6   Lead Time            8123 non-null   int64  
 7   Max. Lead Time       8123 non-null   int64  
 8   Sedang PO            8123 non-null   int64  
 9   Min. Order           8123 non-null   int64  
 10  HPP                  8123 non-null   int64  
 11  Lead Time Sedang PO  8123 non-null   int64  
dtypes: float64(1), int64(9), object(2)
memory usage: 761.7+ KB


### Add Supply Chain params for AutoPO

- Safety stock - (max sales x max lead time) - (avg sales x avg lead time)
- Reorder point - avg sales x avg lead time + safety stock
- Stock cover days (for 21 days) - avg sales x 21
- RoP_Reference (1 -> RoP > Stock cover days, 0 -> RoP < Stock cover days)
- Current stock days cover -> Current stock / avg sales
- Is_open_po (1 -> Current Stock < Reorder point, 0 -> otherwise)
- Initial_Qty_PO - Reorder point - Current stock

- Is_emergency_PO - 1 -> Current stock days cover <= max lead time

- Emergency_PO_Qty - (max lead time - Current stock days cover) x Avg sales

In [None]:
pd.set_option('display.float_format', '{:.2f}'.format)

# 1. Safety stock = (max sales x max lead time) - (avg sales x avg lead time)
df_clean['Safety stock'] = (df_clean['Max. Daily Sales'] * df_clean['Max. Lead Time']) - (df_clean['Daily Sales'] * df_clean['Lead Time'])
# round up safety stock
df_clean['Safety stock'] = df_clean['Safety stock'].apply(lambda x: np.ceil(x)).astype(int)

# 2. Reorder point = (avg sales x avg lead time) + safety stock
df_clean['Reorder point'] = np.ceil((df_clean['Daily Sales'] * df_clean['Lead Time']) + 
                                   df_clean['Safety stock']).astype(int)

# 3. Stock cover days (in Qty) for 30 days = avg sales x 30 
df_clean['Stock cover 30 days'] = df_clean['Daily Sales'] * 30
df_clean['Stock cover 30 days'] = df_clean['Stock cover 30 days'].apply(lambda x: np.ceil(x)).astype(int)

# 5. Current stock days cover (in days) = Current stock / avg sales
df_clean['current_stock_days_cover'] = (df_clean['Stock'].astype(float) * 1.0 ) / df_clean['Daily Sales'].astype(float)

# 6. Is_open_po (1 -> Current stock < Reorder point, 0 -> otherwise)
df_clean['is_open_po'] = np.where((df_clean['current_stock_days_cover'] <= 30) & (df_clean['Stock'] <= df_clean['Reorder point']), 1, 0)

# 7. Initial_Qty_PO = Stock cover 30 days - Current stock - sedang PO
df_clean['initial_qty_po'] = df_clean['Stock cover 30 days'] - df_clean['Stock'] - df_clean['Sedang PO']
df_clean['initial_qty_po'] = np.where(df_clean['is_open_po'] == 1, df_clean['initial_qty_po'], 0)
df_clean['initial_qty_po'] = df_clean['initial_qty_po'].apply(lambda x: x if x > 0 else 0).astype(int)

# 9. Emergency_PO_Qty = (max lead time - Current stock days cover) x Avg sales
# First, ensure 'Sedang PO' column exists and handle potential missing values
if 'Sedang PO' not in df_clean.columns:
    df_clean['Sedang PO'] = 0  # Default to 0 if column doesn't exist

# Calculate emergency_po_qty based on the condition
df_clean['emergency_po_qty'] = np.where(
    df_clean['Sedang PO'] > 0,  # If there is 'Sedang PO' quantity
    np.maximum(0, (df_clean['Lead Time Sedang PO'] - df_clean['current_stock_days_cover']) * 
              df_clean['Daily Sales']),
    # Else use the original formula
    np.ceil((df_clean['Max. Lead Time'] - df_clean['current_stock_days_cover']) * 
            df_clean['Daily Sales'])
)

# First, handle any infinite values and NaN values
df_clean['emergency_po_qty'] = (
    df_clean['emergency_po_qty']
    .replace([np.inf, -np.inf], 0)  # Replace infinities with 0
    .fillna(0)                      # Fill any remaining NaNs with 0
    .astype(int)                    # Now safely convert to integers
)

# If you want to ensure no negative values (since it's a quantity)
df_clean['emergency_po_qty'] = df_clean['emergency_po_qty'].clip(lower=0)

# calculate updated po quantity
df_clean['updated_regular_po_qty'] = df_clean['initial_qty_po'] - df_clean['emergency_po_qty']
df_clean['updated_regular_po_qty'] = df_clean['updated_regular_po_qty'].apply(lambda x: x if x > 0 else 0).astype(int)

# Final check updated regular PO - if less than Min. Order, use Min. Order qty
df_clean['final_updated_regular_po_qty'] = np.where((df_clean['updated_regular_po_qty'] > 0) & (df_clean['updated_regular_po_qty'] < df_clean['Min. Order']), df_clean['Min. Order'], df_clean['updated_regular_po_qty'])


# Calculate total cost (HPP * qty) for emergency PO and final updated regular PO
df_clean['total_cost_emergency_po'] = df_clean['emergency_po_qty'] * df_clean['HPP']
df_clean['total_cost_final_updated_regular_po'] = df_clean['final_updated_regular_po_qty'] * df_clean['HPP']

# Handle any NaN or infinite values by replacing them with 0
df_clean = df_clean.fillna(0)
df_clean = df_clean.replace([np.inf, -np.inf], 0)

# Display the updated DataFrame with new columns
df_clean

Unnamed: 0,Brand,SKU,Nama,Stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Sedang PO,Min. Order,HPP,Lead Time Sedang PO,Safety stock,Reorder point,Stock cover 30 days,current_stock_days_cover,is_open_po,initial_qty_po,emergency_po_qty,updated_regular_po_qty,final_updated_regular_po_qty,total_cost_emergency_po,total_cost_final_updated_regular_po
0,ACNAWAY,10400614911,ACNAWAY 3 in 1 Acne Sun Serum Sunscreen Serum ...,0,0.00,0,4,23,0,1,57850,5,0,0,0,0.00,0,0,0,0,0,0,0
1,ACNAWAY,10100824612,ACNAWAY Mugwort Acne Clear Bar Soap 100gr,0,0.00,0,4,23,24,1,31000,5,0,0,0,0.00,0,0,0,0,0,0,0
2,ACNAWAY,10400517459,ACNAWAY Mugwort Daily Sunscreen Only For Acne ...,0,0.00,0,4,23,16,1,0,5,0,0,0,0.00,0,0,0,0,0,0,0
3,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,2,0.80,4,4,23,25,1,31930,5,89,93,24,2.50,1,0,2,0,0,63860,0
4,ACNAWAY,10500637717,ACNAWAY Mugwort Gel Mask Anti Pores Masker Gel...,35,0.36,8,4,23,0,1,33908,5,183,185,11,97.22,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8118,ZARA,304002157530,ZARA Eau De Parfum Hibiscus 90ml,0,0.00,0,4,23,0,3,0,5,0,0,0,0.00,0,0,0,0,0,0,0
8119,ZARA,30400321119,ZARA Eau De Toilette Go Fruity 90ml,7,0.00,0,4,23,0,3,237002,5,0,0,0,0.00,0,0,0,0,0,0,0
8120,ZARA,30400320908,ZARA Eau De Toilette Peony 90ml,3,0.02,1,4,23,0,3,237002,5,23,24,1,150.00,0,0,0,0,0,0,0
8121,ZARA,30400439849,ZARA Eau De Toilette Twilight Mauve 90ml,8,0.00,0,4,23,0,3,237002,5,0,0,0,0.00,0,0,0,0,0,0,0


In [None]:
# Create output directory if it doesn't exist
os.makedirs('output', exist_ok=True)

# Export to CSV
csv_path = 'output/result.csv'
df_clean.to_csv(csv_path, index=False, sep=';', encoding='utf-8-sig')
print(f"CSV file saved to: {csv_path}")

CSV file saved to: output/result.csv


### Mapping Brand and SKU with supplier (add supplier column)

In [None]:
import pandas as pd
import numpy as np

# Make copies to avoid modifying originals
df_clean_trimmed = df_clean.copy()
raw_supplier_trimmed = raw_supplier_df.copy()

# Trim whitespace from brand names
df_clean_trimmed['Brand'] = df_clean_trimmed['Brand'].str.strip()
raw_supplier_trimmed['Nama Brand'] = raw_supplier_trimmed['Nama Brand'].str.strip()

# First, get all Padang suppliers
padang_suppliers = raw_supplier_trimmed[
    raw_supplier_trimmed['Nama Store'] == 'Miss Glam Padang'
]

# Then get all other suppliers (non-Padang)
other_suppliers = raw_supplier_trimmed[
    raw_supplier_trimmed['Nama Store'] != 'Miss Glam Padang'
]

# Step 1: Left join with Padang suppliers first (priority)
merged_df = pd.merge(
    df_clean_trimmed,
    padang_suppliers,
    left_on='Brand',
    right_on='Nama Brand',
    how='left',
    suffixes=('_clean', '_supplier')
)

# Step 2: For rows without Padang supplier, try to find other suppliers
# Get the indices of rows that didn't get a match with Padang suppliers
no_padang_match = merged_df[merged_df['Nama Brand'].isna()].index

if len(no_padang_match) > 0:
    # Get the brands that need non-Padang suppliers
    brands_needing_suppliers = merged_df.loc[no_padang_match, 'Brand'].unique()
    
    # Get the first matching supplier for each brand (you can change this logic if needed)
    first_supplier_per_brand = other_suppliers.drop_duplicates(subset='Nama Brand')
    
    # Update the rows that didn't have Padang suppliers
    for brand in brands_needing_suppliers:
        supplier_data = first_supplier_per_brand[first_supplier_per_brand['Nama Brand'] == brand]
        if not supplier_data.empty:
            # Update the corresponding rows in merged_df
            brand_mask = (merged_df['Brand'] == brand) & (merged_df['Nama Brand'].isna())
            for col in supplier_data.columns:
                if col in merged_df.columns and col != 'Brand':  # Don't overwrite the Brand column
                    merged_df.loc[brand_mask, col] = supplier_data[col].values[0]

# Clean up: For any remaining NaN values in supplier columns, fill with empty string or as needed
supplier_columns = [
    'ID Supplier', 'Nama Supplier', 'ID Brand', 'ID Store', 
    'Nama Store', 'Hari Order', 'Min. Purchase', 'Trading Term',
    'Promo Factor', 'Delay Factor'
]

for col in supplier_columns:
    if col in merged_df.columns:
        if merged_df[col].dtype == 'object':
            merged_df[col] = merged_df[col].fillna('')
        else:
            merged_df[col] = merged_df[col].fillna(0)

# Show summary
print(f"Total rows in df_clean: {len(df_clean_trimmed)}")
print(f"Total rows after merge: {len(merged_df)}")

# Count how many rows got Padang suppliers vs other suppliers vs no suppliers
padang_count = (merged_df['Nama Store'] == 'Miss Glam Padang').sum()
other_supplier_count = ((merged_df['Nama Store'] != 'Miss Glam Padang') & 
                       (merged_df['Nama Store'] != '')).sum()
no_supplier = (merged_df['Nama Store'] == '').sum()

print(f"\nSuppliers matched:")
print(f"- 'Miss Glam Padang' suppliers: {padang_count} rows")
print(f"- Other suppliers: {other_supplier_count} rows")
print(f"- No supplier data: {no_supplier} rows")

# Save the result
os.makedirs('output', exist_ok=True)
output_path = 'output/merged_with_suppliers.csv'
merged_df.to_csv(output_path, index=False, sep=';', encoding='utf-8-sig')
print(f"\nResults saved to: {output_path}")

# Show a sample of the results
print("\nSample of merged data (first 5 rows):")
display(merged_df.head())

Total rows in df_clean: 8123
Total rows after merge: 8172

Suppliers matched:
- 'Miss Glam Padang' suppliers: 7809 rows
- Other suppliers: 36 rows
- No supplier data: 327 rows

Results saved to: output/merged_with_suppliers.csv

Sample of merged data (first 5 rows):


Unnamed: 0,Brand,SKU,Nama,Stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Sedang PO,Min. Order,HPP,Lead Time Sedang PO,Safety stock,Reorder point,Stock cover 30 days,current_stock_days_cover,is_open_po,initial_qty_po,emergency_po_qty,updated_regular_po_qty,final_updated_regular_po_qty,total_cost_emergency_po,total_cost_final_updated_regular_po,No,ID Supplier,Nama Supplier,ID Brand,Nama Brand,ID Store,Nama Store,Hari Order,Min. Purchase,Trading Term,Promo Factor,Delay Factor
0,ACNAWAY,10400614911,ACNAWAY 3 in 1 Acne Sun Serum Sunscreen Serum ...,0,0.0,0,4,23,0,1,57850,5,0,0,0,0.0,0,0,0,0,0,0,0,2787.0,1.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.0,ACNAWAY,7.0,Miss Glam Padang,2.0,500000.0,0.0,,
1,ACNAWAY,10100824612,ACNAWAY Mugwort Acne Clear Bar Soap 100gr,0,0.0,0,4,23,24,1,31000,5,0,0,0,0.0,0,0,0,0,0,0,0,2787.0,1.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.0,ACNAWAY,7.0,Miss Glam Padang,2.0,500000.0,0.0,,
2,ACNAWAY,10400517459,ACNAWAY Mugwort Daily Sunscreen Only For Acne ...,0,0.0,0,4,23,16,1,0,5,0,0,0,0.0,0,0,0,0,0,0,0,2787.0,1.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.0,ACNAWAY,7.0,Miss Glam Padang,2.0,500000.0,0.0,,
3,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,2,0.8,4,4,23,25,1,31930,5,89,93,24,2.5,1,0,2,0,0,63860,0,2787.0,1.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.0,ACNAWAY,7.0,Miss Glam Padang,2.0,500000.0,0.0,,
4,ACNAWAY,10500637717,ACNAWAY Mugwort Gel Mask Anti Pores Masker Gel...,35,0.36,8,4,23,0,1,33908,5,183,185,11,97.22,0,0,0,0,0,0,0,2787.0,1.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.0,ACNAWAY,7.0,Miss Glam Padang,2.0,500000.0,0.0,,


In [None]:
# Merge df_clean with raw_supplier_df to see all supplier matches
all_suppliers_merge = pd.merge(
    df_clean_trimmed,
    raw_supplier_trimmed,
    left_on='Brand',
    right_on='Nama Brand',
    how='left'
)

# Group by Brand and SKU to count unique suppliers
supplier_counts = all_suppliers_merge.groupby(['Brand', 'SKU'])['Nama Supplier'].nunique().reset_index()
supplier_counts.columns = ['Brand', 'SKU', 'Supplier_Count']

# Filter for brands/SKUs with multiple suppliers
multi_supplier_items = supplier_counts[supplier_counts['Supplier_Count'] > 1]

print(f"Found {len(multi_supplier_items)} brand/SKU combinations with multiple suppliers")
print("\nSample of items with multiple suppliers:")
display(multi_supplier_items.head())

# If you want to see the actual supplier details for these items
if not multi_supplier_items.empty:
    print("\nDetailed supplier information for multi-supplier items:")
    multi_supplier_details = all_suppliers_merge.merge(
        multi_supplier_items[['Brand', 'SKU']],
        on=['Brand', 'SKU']
    )
    display(multi_supplier_details[['Brand', 'SKU', 'Nama Supplier', 'Nama Store']].drop_duplicates().sort_values(['Brand', 'SKU']))

    # List of SKUs to check
skus_to_check = [
    '8995232702124',  # ACNEMED
    '8992821100293',  # ACNES
    '8992821100309',  # ACNES
    '8992821100323',  # ACNES
    '8992821100354'   # ACNES
]

# Convert SKUs to integers (since they appear as integers in df_clean)
skus_to_check = [int(sku) for sku in skus_to_check]

# Check if these SKUs exist in df_clean
found_skus = merged_df[merged_df['SKU'].isin(skus_to_check)]

if not found_skus.empty:
    print("Found matching SKUs in df_clean:")
    display(found_skus[['Brand', 'SKU', 'Nama']])
else:
    print("None of these SKUs were found in df_clean.")
    print("\nChecking if there are any similar SKUs...")
    
    # Check for any SKUs that contain these numbers
    for sku in skus_to_check:
        similar = merged_df[merged_df['SKU'].astype(str).str.contains(str(sku)[:8])]
        if not similar.empty:
            print(f"\nSKUs similar to {sku}:")
            display(similar[['Brand', 'SKU', 'Nama']])
    
    # Check the data types to ensure we're comparing correctly
    print("\nData type of SKU column:", merged_df['SKU'].dtype)
    print("Sample SKUs from df_clean:", merged_df['SKU'].head().tolist())

Found 6943 brand/SKU combinations with multiple suppliers

Sample of items with multiple suppliers:


Unnamed: 0,Brand,SKU,Supplier_Count
6,ACNEMED,8995232702124,2
7,ACNES,8992821100293,10
8,ACNES,8992821100309,10
9,ACNES,8992821100323,10
10,ACNES,8992821100354,10



Detailed supplier information for multi-supplier items:


Unnamed: 0,Brand,SKU,Nama Supplier,Nama Store
0,ACNEMED,8995232702124,PT. PENTA VALENT - PPN (PDG),Miss Glam Padang
1,ACNEMED,8995232702124,PT. PENTA VALENT - PPN (PDG),Miss Glam Damar
2,ACNEMED,8995232702124,PT. PENTA VALENT - PPN (PDG),Miss Glam Payakumbuh
3,ACNEMED,8995232702124,PT. PENTA VALENT - PPN (PDG),Miss Glam Solok
4,ACNEMED,8995232702124,PT. PENTA VALENT - PPN (PDG),Miss Glam Sutomo
...,...,...,...,...
220564,ZWITSAL,8999999561567,PT. SINARMAS DISTRIBUSI NUSANTARA - PPN (BKL),Miss Glam Bengkulu
220565,ZWITSAL,8999999561567,PT. TEMAN JAYA ABADI - PPN (PKP),Miss Glam Bangka
220566,ZWITSAL,8999999561567,PT. USAHA BERSAMA NATAR - PPN (LPG),Miss Glam Lampung
220567,ZWITSAL,8999999561567,PT. USAHA BERSAMA NATAR - PPN (LPG),Miss Glam Kedaton


Found matching SKUs in df_clean:


Unnamed: 0,Brand,SKU,Nama
6,ACNEMED,8995232702124,ACNEMED Facial Wash For Oily Skin 100gr
9,ACNES,8992821100309,ACNES Creamy Wash 100gr
14,ACNES,8992821100293,ACNES Foaming Wash 100ml
17,ACNES,8992821100354,ACNES Oil Control Film isi 50
24,ACNES,8992821100323,ACNES Sealing Jell Gel 18gr


### Find brands who are missing suppliers

In [None]:
# Find brands in df_clean that don't have a match in raw_supplier_df
missing_brands = set(df_clean['Brand']) - set(raw_supplier_df['Nama Brand'].dropna().unique())

print(f"Number of brands in df_clean: {len(df_clean['Brand'].unique())}")
print(f"Number of brands in raw_supplier_df: {len(raw_supplier_df['Nama Brand'].unique())}")
print(f"\nNumber of brands missing supplier data: {len(missing_brands)}")
print("\nFirst 20 missing brands (alphabetical order):")
print(sorted(list(missing_brands))[:20])

# Count how many rows are affected per missing brand
missing_brand_counts = df_clean[df_clean['Brand'].isin(missing_brands)]['Brand'].value_counts()
print("\nTop 20 missing brands by row count:")
print(missing_brand_counts)

Number of brands in df_clean: 430
Number of brands in raw_supplier_df: 500

Number of brands missing supplier data: 58

First 20 missing brands (alphabetical order):
['AHE', 'AIUEO', 'AVAIL', 'AWDAY', 'B.U.T', 'BEAUDELAB', "BENING'S", 'BHUMI', 'BROWIE', 'CELLUVIE', 'COLAB', 'DEATH POMADE', 'DOM POMADE', 'FLIMTY', 'GLOSSMEN', 'GOSMILE', 'HAIR & ME', 'HALOCA BEAUTY', 'HNH', 'INITO']

Top 20 missing brands by row count:
Brand
MYKONOS             64
JACQUELLE BEAUTE    35
SOME BY MI          30
MEZUCA              15
MERCREDI            13
MAIMEITE            12
SECRET GARDEN       10
ROSE ALL DAY         8
RAECCA               8
SLEEPOVER            7
SEBAMED              7
VIO                  7
OLAY                 7
BENING'S             6
GOSMILE              6
NEW SKIN             5
MISS TI              5
M&S BEAUTY           4
MARKS & SPENCER      4
NO BAD HAIR          3
B.U.T                3
AVAIL                3
PINKROULETTE         3
AIUEO                3
COLAB                

# Output grouped data per one brand and one SKU to separate files

In [None]:
import os
import pandas as pd

# Create output directory
output_dir = 'output_po'
os.makedirs(output_dir, exist_ok=True)

# Create a directory for brands without suppliers
no_supplier_dir = os.path.join(output_dir, '0_no_suppliers')
os.makedirs(no_supplier_dir, exist_ok=True)

# Function to sanitize folder names
def sanitize_folder_name(name):
    # Remove or replace invalid characters
    invalid_chars = '<>:"/\\|?*'
    for char in invalid_chars:
        name = name.replace(char, '_')
    return name.strip()

# Process each group
for (supplier_id, supplier_name, brand), group in final_df.groupby(['ID Supplier', 'Nama Supplier', 'Brand']):
    # Skip if no supplier (shouldn't happen as we replaced NaN with defaults)
    if pd.isna(supplier_id) or not supplier_name:
        # Save to no_supplier_dir
        brand_file = os.path.join(no_supplier_dir, f'{sanitize_folder_name(brand)}.csv')
        group.to_csv(brand_file, index=False, sep=';', encoding='utf-8-sig')
        continue
    
    # Create supplier directory
    supplier_dir = os.path.join(output_dir, f'{int(supplier_id)}_{sanitize_folder_name(supplier_name)}')
    os.makedirs(supplier_dir, exist_ok=True)
    
    # Save brand file
    brand_file = os.path.join(supplier_dir, f'{sanitize_folder_name(brand)}.csv')
    group.to_csv(brand_file, index=False, sep=';', encoding='utf-8-sig')

print("Data has been organized into supplier and brand-based folders in 'output_po'")

Data has been organized into supplier and brand-based folders in 'output_po'


# Final batch process

In [None]:
# Cell 1: Import libraries and setup
import pandas as pd
from pathlib import Path
import os
from IPython.display import display
import locale
from locale import atof
import numpy as np
from openpyxl.styles import numbers



# Apply the formatting to numeric columns in your final output
def format_dataframe_display(df):
    # Make a copy to avoid SettingWithCopyWarning
    df_display = df.copy()
    
    # Apply formatting to numeric columns
    for col in df_display.select_dtypes(include=['int64', 'float64']).columns:
        df_display[col] = df_display[col].apply(
            lambda x: format_id_number(x, 2) if pd.notna(x) else x
        )
    
    return df_display

# Then, when you want to display the dataframe with Indonesian formatting:
# display(format_dataframe_display(df_clean))

# Configuration
BASE_DIR = Path('/Users/andresuchitra/dev/missglam/autopo')
SUPPLIER_PATH = BASE_DIR / 'data/supplier.csv'
RAWPO_DIR = BASE_DIR / 'data/rawpo/csv'
STORE_CONTRIBUTION_PATH = BASE_DIR / 'data/store_contribution.csv'
OUTPUT_DIR = BASE_DIR / 'output/cleaned_po_with_suppliers'
os.makedirs(OUTPUT_DIR, exist_ok=True)

def load_store_contribution(store_contribution_path):
    """Load and prepare store contribution data."""
    store_contrib = pd.read_csv(store_contribution_path, header=None, 
                              names=['store', 'contribution_pct'])
    # Convert store names to lowercase for case-insensitive matching
    store_contrib['store_lower'] = store_contrib['store'].str.lower()
    return store_contrib

def get_contribution_pct(location, store_contrib):
    """Get contribution percentage for a given location."""
    location_lower = location.lower()

    contrib_row = store_contrib[store_contrib['store_lower'] == location_lower]
    if not contrib_row.empty:
        return contrib_row['contribution_pct'].values[0]
    print(f"Warning: No contribution percentage found for {location}")

    return 100  # Default to 100% if not found

def load_supplier_data(supplier_path):
    """Load and clean supplier data."""
    print("Loading supplier data...")
    df = pd.read_csv(supplier_path, sep=';', decimal=',').fillna('')
    df['Nama Brand'] = df['Nama Brand'].str.strip()
    return df

def merge_with_suppliers(df_clean, supplier_df):
    """Merge PO data with supplier information."""
    # Get Padang suppliers (priority)
    padang_suppliers = supplier_df[supplier_df['Nama Store'] == 'Miss Glam Padang']
    other_suppliers = supplier_df[supplier_df['Nama Store'] != 'Miss Glam Padang']
    
    # First merge with Padang suppliers
    merged_df = pd.merge(
        df_clean,
        padang_suppliers,
        left_on='Brand',
        right_on='Nama Brand',
        how='left',
        suffixes=('_clean', '_supplier')
    )
    
    # Then try other suppliers for unmatched brands
    no_padang_match = merged_df[merged_df['Nama Brand'].isna()].index
    if len(no_padang_match) > 0:
        brands_needing_suppliers = merged_df.loc[no_padang_match, 'Brand'].unique()
        first_supplier_per_brand = other_suppliers.drop_duplicates(subset='Nama Brand')
        
        for brand in brands_needing_suppliers:
            supplier_data = first_supplier_per_brand[first_supplier_per_brand['Nama Brand'] == brand]
            if not supplier_data.empty:
                brand_mask = (merged_df['Brand'] == brand) & (merged_df['Nama Brand'].isna())
                for col in supplier_data.columns:
                    if col in merged_df.columns and col != 'Brand':
                        merged_df.loc[brand_mask, col] = supplier_data[col].values[0]
    
    # Clean up supplier columns
    supplier_columns = [
        'ID Supplier', 'Nama Supplier', 'ID Brand', 'ID Store', 
        'Nama Store', 'Hari Order', 'Min. Purchase', 'Trading Term',
        'Promo Factor', 'Delay Factor'
    ]
    for col in supplier_columns:
        if col in merged_df.columns:
            merged_df[col] = merged_df[col].fillna('' if merged_df[col].dtype == 'object' else 0)
    
    return merged_df

def calculate_inventory_metrics(df_clean):
    """
    Calculate various inventory metrics including safety stock, reorder points, and PO quantities.
    
    Args:
        df_clean (pd.DataFrame): Input dataframe with required columns
        
    Returns:
        pd.DataFrame: Dataframe with added calculated columns
    """
    import numpy as np
    import pandas as pd
    
    # Ensure we're working with a copy to avoid SettingWithCopyWarning
    df = df_clean.copy()
    
    # Set display options
    pd.set_option('display.float_format', '{:.2f}'.format)

    # Normalise stock column name
    stock_col = 'Stok' if 'Stok' in df.columns else 'Stock'

    # Force the columns we need into numeric form
    numeric_cols = [
        stock_col, 'Daily Sales', 'Max. Daily Sales', 'Lead Time',
        'Max. Lead Time', 'Sedang PO', 'HPP', 'Lead Time Sedang PO'
    ]
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
    
    try:
        # 1. Safety stock calculation
        df['Safety stock'] = (df['Max. Daily Sales'] * df['Max. Lead Time']) - (df['Daily Sales'] * df['Lead Time'])
        df['Safety stock'] = df['Safety stock'].apply(lambda x: np.ceil(x)).fillna(0).astype(int)
        
        # 2. Reorder point calculation
        df['Reorder point'] = np.ceil((df['Daily Sales'] * df['Lead Time']) + df['Safety stock']).fillna(0).astype(int)
        
        # 3. Stock cover for 30 days
        df['Stock cover 30 days'] = (df['Daily Sales'] * 30).apply(lambda x: np.ceil(x)).fillna(0).astype(int)
        
        # 4. Current stock days cover
        df['current_stock_days_cover'] = np.where(
            df['Daily Sales'] > 0,
            df[stock_col] / df['Daily Sales'],
            0
        )
        
        # 5. Is open PO flag
        df['is_open_po'] = np.where(
            (df['current_stock_days_cover'] <= 30) & 
            (df['Stok'] <= df['Reorder point']), 1, 0
        )
        
        # 6. Initial PO quantity
        df['initial_qty_po'] = df['Stock cover 30 days'] - df[stock_col] - df.get('Sedang PO', 0)
        df['initial_qty_po'] = (
            pd.Series(
                np.where(df['is_open_po'] == 1, df['initial_qty_po'], 0),
                index=df.index
            )
            .clip(lower=0)
            .astype(int)
        )
        
        # 7. Emergency PO quantity
        df['emergency_po_qty'] = np.where(
            df.get('Sedang PO', 0) > 0,
            np.maximum(0, (df['Lead Time Sedang PO'] - df['current_stock_days_cover']) * df['Daily Sales']),
            np.ceil((df['Max. Lead Time'] - df['current_stock_days_cover']) * df['Daily Sales'])
        )
        
        # Clean up emergency PO quantities
        df['emergency_po_qty'] = (
            df['emergency_po_qty']
            .replace([np.inf, -np.inf], 0)
            .fillna(0)
            .clip(lower=0)
            .astype(int)
        )
        
        # 8. Updated regular PO quantity
        df['updated_regular_po_qty'] = (df['initial_qty_po'] - df['emergency_po_qty']).clip(lower=0).astype(int)
        
        # 9. Final updated regular PO quantity (enforce minimum order)
        df['final_updated_regular_po_qty'] = np.where(
            (df['updated_regular_po_qty'] > 0) & 
            (df['updated_regular_po_qty'] < df['Min. Order']),
            df['Min. Order'],
            df['updated_regular_po_qty']
        ).astype(int)
        
        # 10. Calculate costs if by multiplying with contribution percentage
        df['emergency_po_cost'] = (df['emergency_po_qty'] * df['HPP']).round(2)
        df['final_updated_regular_po_cost'] = (df['final_updated_regular_po_qty'] * df['HPP']).round(2)
        
        # Clean up any remaining NaN or infinite values
        df = df.fillna(0)
        
        return df
        
    except Exception as e:
        print(f"Error in calculate_inventory_metrics: {str(e)}")
        return df_clean

def clean_po_data(df, location, contribution_pct=100, padang_sales=None):
    """Clean and prepare PO data with contribution calculations."""
    try:
        # Create a copy to avoid modifying the original DataFrame
        df = df.copy()

        # Keep original column names but strip any extra whitespace
        df.columns = df.columns.str.strip()
        print(f"Available columns in input file: ...{df.columns}")

        # Define required columns (using original case)
        required_columns = [
            'Brand', 'SKU', 'Nama', 'Toko', 'Stok',
            'Daily Sales', 'Max. Daily Sales', 'Lead Time',
            'Max. Lead Time', 'Min. Order', 'Sedang PO', 'HPP'
        ]
        
        # Find actual column names in the DataFrame (case-sensitive)
        available_columns = {col.strip(): col for col in df.columns}
        columns_to_keep = []
        
        for col in required_columns:
            if col in available_columns:
                columns_to_keep.append(available_columns[col])
            else:
                print(f"Warning: Column '{col}' not found in input data")
                # Add as empty column if it's required
                if col in ['Brand', 'SKU', 'HPP']:  # These are critical
                    df[col] = ''

        # Select only the columns we need
        df = df[[col for col in columns_to_keep if col in df.columns]]

        # Check for missing required columns
        missing_columns = [col for col in ['Brand', 'SKU', 'HPP'] if col not in df.columns]
        if missing_columns:
            raise ValueError(
                f"Missing required columns: {missing_columns}. "
                f"Available columns: {df.columns.tolist()}"
            )

        # Clean brand column
        if 'Brand' in df.columns:
            df['Brand'] = df['Brand'].astype(str).str.strip()

        # Convert numeric columns with better error handling
        numeric_columns = [
            'Stok', 'Daily Sales', 'Max. Daily Sales', 'Lead Time',
            'Max. Lead Time', 'Sedang PO', 'HPP'
        ]

        for col in numeric_columns:
            if col in df.columns:
                try:
                    # First convert to string, clean, then to numeric
                    df[col] = (
                        df[col]
                        .astype(str)
                        .str.replace(r'[^\d.,-]', '', regex=True)  # Remove non-numeric except .,-
                        .str.replace(',', '.', regex=False)         # Convert commas to decimal points
                        .replace('', '0')                           # Empty strings to '0'
                        .astype(float)                              # Convert to float
                        .fillna(0)                                  # Fill any remaining NaNs with 0
                    )
                except Exception as e:
                    print(f"Warning: Could not convert column '{col}' to numeric: {str(e)}")
                    df[col] = 0  # Set to 0 if conversion fails

        # Add contribution percentage and calculate costs
        contribution_pct = float(contribution_pct)
        df['contribution_pct'] = contribution_pct
        df['contribution_ratio'] = contribution_pct / 100

        # Add default values for other required columns
        if 'Lead Time Sedang PO' not in df.columns:
            df['Lead Time Sedang PO'] = ''

        location_upper = location.upper()
        exempt_stores = {"PADANG", "SOETA", "BALIKPAPAN"}
        needs_padang_override = (location_upper not in exempt_stores) or (contribution_pct < 100)

        print(f"Processing store: {location} - {contribution_pct}%")

        # Add 'Is in Padang' column
        if padang_sales is not None:
            padang_skus = set(padang_sales['SKU'].astype(str).unique())
            df['Is in Padang'] = df['SKU'].astype(str).isin(padang_skus).astype(int)
        else:
            print("Warning: No Padang sales data provided. 'Is in Padang' will be set to 0 for all SKUs.")
            df['Is in Padang'] = 0

        if not needs_padang_override:
            # If no override needed, ensure we have the original sales columns
            if 'Daily Sales' not in df.columns and 'Orig Daily Sales' in df.columns:
                df['Daily Sales'] = df['Orig Daily Sales']
            if 'Max. Daily Sales' not in df.columns and 'Orig Max. Daily Sales' in df.columns:
                df['Max. Daily Sales'] = df['Orig Max. Daily Sales']
            return df

        if padang_sales is None:
            raise ValueError(
                "Padang sales data is required for stores outside Padang/Soeta/Balikpapan "
                "or any store with contribution < 100%."
            )

        # Process Padang sales data - keep original column names
        padang_df = padang_sales.copy()
        padang_df.columns = padang_df.columns.str.strip()  # Only strip whitespace
        
        # Ensure required columns exist in padang_df
        required_padang_cols = ['SKU', 'Daily Sales', 'Max. Daily Sales']
        missing_padang_cols = [col for col in required_padang_cols if col not in padang_df.columns]
        
        if missing_padang_cols:
            raise ValueError(
                f"Padang sales data is missing required columns: {missing_padang_cols}. "
                f"Available columns: {padang_df.columns.tolist()}"
            )

        # Save original sales columns if they exist
        if 'Daily Sales' in df.columns and 'Orig Daily Sales' not in df.columns:
            df = df.rename(columns={'Daily Sales': 'Orig Daily Sales'})
        if 'Max. Daily Sales' in df.columns and 'Orig Max. Daily Sales' not in df.columns:
            df = df.rename(columns={'Max. Daily Sales': 'Orig Max. Daily Sales'})

        print("Overriding with Padang sales data...")
        contribution_ratio = contribution_pct / 100

        # Merge with Padang's sales data using original column names
        df = df.merge(
            padang_df[['SKU', 'Daily Sales', 'Max. Daily Sales']].rename(columns={
                'Daily Sales': 'Padang Daily Sales',
                'Max. Daily Sales': 'Padang Max Daily Sales'
            }),
            on='SKU',
            how='left'
        )

        # Calculate adjusted sales based on contribution and 'Is in Padang' flag
        if 'Padang Daily Sales' in df.columns and 'Orig Daily Sales' in df.columns:
            df['Daily Sales'] = np.where(
                df['Is in Padang'] == 1,
                df['Padang Daily Sales'] * contribution_ratio,
                df['Orig Daily Sales']
            )
            
        if 'Padang Max Daily Sales' in df.columns and 'Orig Max. Daily Sales' in df.columns:
            df['Max. Daily Sales'] = np.where(
                df['Is in Padang'] == 1,
                df['Padang Max Daily Sales'] * contribution_ratio,
                df['Orig Max. Daily Sales']
            )

        # Drop intermediate columns
        columns_to_drop = [
            'Padang Daily Sales', 'Padang Max Daily Sales'
        ]
        df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')

        return df

    except Exception as e:
        print(f"Error in clean_po_data: {str(e)}")
        import traceback
        traceback.print_exc()
        
        # Return empty DataFrame with required columns if there's an error
        desired_columns = [
            'Brand', 'SKU', 'Nama', 'HPP', 'Toko', 'Stok', 
            'Daily Sales', 'Max. Daily Sales', 'Lead Time', 
            'Max. Lead Time', 'Sedang PO', 'contribution_pct',
            'emergency_po_cost', 'final_updated_regular_po_cost',
            'Is in Padang'  # Added new column
        ]
        return pd.DataFrame(columns=desired_columns)

def get_store_name_from_filename(filename):
    """Extract store name from filename, handling different patterns."""
    # Remove file extension and split by spaces
    name_parts = Path(filename).stem.split()
    
    # Handle cases like "002 Miss Glam Pekanbaru.csv" -> "Pekanbaru"
    # or "01 Miss Glam Padang.csv" -> "Padang"
    if len(name_parts) >= 3 and name_parts[1].lower() == 'miss' and name_parts[2].lower() == 'glam':
        return ' '.join(name_parts[3:]).strip().upper()
    elif len(name_parts) >= 2 and name_parts[0].lower() == 'miss' and name_parts[1].lower() == 'glam':
        return ' '.join(name_parts[2:]).strip().upper()
    # Fallback: take everything after the first space
    elif ' ' in filename:
        return ' '.join(name_parts[1:]).strip().upper()
    return name_parts[0].upper()

def read_csv_file(file_path):
    # List of (separator, encoding) combinations to try
    formats_to_try = [
        (',', 'utf-8'),      # Standard CSV with comma
        (';', 'utf-8'),      # Semicolon with UTF-8
        (',', 'latin1'),     # Comma with Latin1
        (';', 'latin1'),     # Semicolon with Latin1
        (',', 'cp1252'),     # Windows-1252 encoding
        (';', 'cp1252')
    ]
    
    for sep, enc in formats_to_try:
        try:
            df = pd.read_csv(
                file_path,
                sep=sep,
                decimal=',',
                thousands='.',
                encoding=enc,
                engine='python'  # More consistent behavior with Python engine
            )
            # If we get here, the file was read successfully
            if not df.empty:
                return df
        except (UnicodeDecodeError, pd.errors.ParserError, pd.errors.EmptyDataError) as e:
            continue  # Try next format
        except Exception as e:
            print(f"Unexpected error reading {file_path} with sep='{sep}', encoding='{enc}': {str(e)}")
            continue
    
    # If we get here, all attempts failed
    print(f"Failed to read {file_path} with any known format")
    return None

def process_po_file(file_path, supplier_df, store_contrib, df_padang):
    """Process a single PO file and return merged data and summary."""
    print(f"\nProcessing {file_path.name}...")
    
    try:
        # Extract location from filename using the new function
        location = get_store_name_from_filename(file_path.name)
        print(f"  - Extracted location: {location}")  # Debug print
        
        contribution_pct = get_contribution_pct(location, store_contrib)
        
        # Read the CSV with error handling
        try:
            # Try reading with different encodings if needed
            df = read_csv_file(file_path)
            
            # Check if DataFrame is empty
            if df.empty:
                raise ValueError("File is empty")
                
            # Clean the data
            df_clean = clean_po_data(df,location, contribution_pct, df_padang)
            
            # Skip if cleaning failed
            if df_clean.empty:
                raise ValueError("Data cleaning failed")
            
            # Merge with suppliers
            merged_df = merge_with_suppliers(df_clean, supplier_df)

            # calculate metrics PO
            merged_df = calculate_inventory_metrics(merged_df)
            
            # Generate summary
            padang_count = (merged_df['Nama Store'] == 'Miss Glam Padang').sum()
            other_supplier_count = ((merged_df['Nama Store'] != 'Miss Glam Padang') & 
                                  (merged_df['Nama Store'] != '')).sum()
            
            summary = {
                'file': file_path.name,
                'location': location,
                'contribution_pct': contribution_pct,
                'total_rows': len(merged_df),
                'padang_suppliers': int(padang_count),
                'other_suppliers': int(other_supplier_count),
                'no_supplier': int((merged_df['Nama Store'] == '').sum()),
                'status': 'Success'
            }
            
            return merged_df, summary
            
        except Exception as e:
            raise Exception(f"Error processing file data: {str(e)}")
            
    except Exception as e:
        error_msg = f"Error processing {file_path.name}: {str(e)}"
        print(f"  - {error_msg}")
        return None, {
            'file': file_path.name,
            'location': location if 'location' in locals() else 'Unknown',
            'contribution_pct': contribution_pct if 'contribution_pct' in locals() else 0,
            'total_rows': 0,
            'padang_suppliers': 0,
            'other_suppliers': 0,
            'no_supplier': 0,
            'status': f"Error: {str(e)[:100]}"  # Truncate long error messages
        }

def load_padang_data(padang_path):
    print("Parsing Padang data...")

    try:
        df = pd.read_csv(padang_path, sep=';', decimal=',', thousands='.')

        print(f"Padang data loaded successfully..")
        display(df.head(10))

        return df
    except Exception as e:
        raise Exception(f"Error loading Padang data: {str(e)}")

def format_number_for_csv(x):
    """Format numbers for CSV output with Indonesian locale (comma as decimal, dot as thousand)"""
    if pd.isna(x) or x == '':
        return x
    try:
        if isinstance(x, (int, float)):
            if x == int(x):  # Whole number
                return f"{int(x):,d}".replace(",", ".")
            else:  # Decimal number
                return f"{x:,.2f}".replace(",", "X").replace(".", ",").replace("X", ".")
        return x
    except:
        return x

def save_to_csv(df, filename):
    df_output = df.copy()

    # Ensure SKU stays textual
    if 'SKU' in df_output.columns:
        df_output['SKU'] = df_output['SKU'].astype(str)

    # Only touch numeric columns
    numeric_cols = df_output.select_dtypes(include=['number']).columns
    for col in numeric_cols:
        df_output[col] = df_output[col].apply(format_number_for_csv)

    output_path = OUTPUT_DIR / filename
    df_output.to_csv(output_path, index=False, sep=';', decimal=',', encoding='utf-8-sig')
    print(f"File saved to {output_path}")

def save_to_excel(df, filename, sheet_name="PO"):
    df_output = df.copy()
    df_output['SKU'] = df_output['SKU'].astype(str)

    output_path = OUTPUT_DIR / filename
    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
        df_output.to_excel(writer, index=False, sheet_name=sheet_name)
        ws = writer.sheets[sheet_name]
        sku_col_idx = df_output.columns.get_loc("SKU") + 1
        for cell in ws.iter_cols(min_col=sku_col_idx, max_col=sku_col_idx,
                                 min_row=2, max_row=ws.max_row):
            for c in cell:
                c.number_format = numbers.FORMAT_TEXT
    print(f"File saved to {output_path}")

def save_to_m2_format(df, filename, is_excel: False):
    df_output = df.copy()[['Toko', 'SKU', 'HPP', 'final_updated_regular_po_qty']]

    # Ensure SKU stays textual
    if 'SKU' in df_output.columns:
        df_output['SKU'] = df_output['SKU'].astype(str)

    output_path = OUTPUT_DIR / filename
    if is_excel:
        df_output.to_excel(output_path, index=False)
    else:
        df_output.to_csv(output_path, index=False, sep=';', decimal=',', encoding='utf-8-sig')
    print(f"File saved to {output_path}")

def main():
    # Load data
    supplier_df = load_supplier_data(SUPPLIER_PATH)
    store_contrib = load_store_contribution(STORE_CONTRIBUTION_PATH)
    all_summaries = []

    # get padang df first
    df_padang = load_padang_data('data/rawpo/csv/1.Miss glam Padang.csv')

    # Process each PO file
    for file_path in sorted(RAWPO_DIR.glob('*.csv')):
        try:
            merged_df, summary = process_po_file(file_path, supplier_df, store_contrib, df_padang)
            
            # Save results
            output_path = OUTPUT_DIR / f'with_suppliers_{file_path.name}'
            m2_output_path = OUTPUT_DIR / f'm2_{file_path.name}'
            save_to_csv(merged_df, output_path.name)
            save_to_m2_format(merged_df, m2_output_path.name)


            summary['output_path'] = str(output_path)
            
            # Print progress
            print(f"  - Location: {summary['location']}")
            print(f"  - Contribution: {summary['contribution_pct']}%")
            print(f"  - Rows processed: {summary['total_rows']}")
            print(f"  - 'Miss Glam Padang' suppliers: {summary['padang_suppliers']} rows")
            print(f"  - Other suppliers: {summary['other_suppliers']} rows")
            print(f"  - No supplier data: {summary['no_supplier']} rows")
            print(f"  - Saved to: {output_path}")
            
            all_summaries.append(summary)
            
        except Exception as e:
            print(f"Error processing {file_path.name}: {str(e)}")
            continue
    
    # Display final summary
    if all_summaries:
        print("\nProcessing complete! Summary:")
        summary_df = pd.DataFrame(all_summaries)
        display(summary_df)
        
        # Show sample of last processed file
        print("\nSample of the last processed file:")
        display(merged_df)
    else:
        print("\nNo files were processed successfully.")

# Run the main function
if __name__ == "__main__":
    main()

Loading supplier data...
Parsing Padang data...
Padang data loaded successfully..


Unnamed: 0,No,Brand,SKU,Nama,HPP,Harga,Ranking,Grade,Kategori Brand,Kategori,Sub Kategori,Top4000,Terjual,Stok,Last Sales,Lost Days,Velocity Capped,Daily Sales,Lead Time,Max. Daily Sales,Max. Lead Time,Min. Order,Safety Stok,ROP,3W Cover,Sedang PO,Suggested,Amount,Toko,Siklus,[Brand] Promo Factor,[Brand] Delay Factor,[SKU] Promo Factor,[SKU] Delay Factor,[Master - SKU] Promo Factor,[Master - SKU] Delay Factor,Stock Cover,Days to Backup,Qty to Backup
0,1,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,31930,42000,1193,A,BRAND VIRAL,Face Cleansing,Facial Wash,1,43,0,2025-11-22,1.0,1.0,0.74,4,4,23,1,89.04,92,100,25,75,2394750,Miss Glam Padang,21,0,0,0,0,,,0.0,4.0,2.96
1,2,ACNAWAY,10500637717,ACNAWAY Mugwort Gel Mask Anti Pores Masker Gel...,33908,45000,1669,A,BRAND VIRAL,Face Mask,Clay Mask,1,29,25,2025-11-23,0.0,0.0,0.49,4,8,23,1,182.03,184,200,0,175,5933900,Miss Glam Padang,21,0,0,0,0,,,51.02,0.0,0.0
2,3,ACNAWAY,10300721756,ACNAWAY Mugwort Water Gel Moisturizer with Mug...,30928,45000,1485,A,BRAND VIRAL,Moisturizer,All Day Moisturizer,1,33,34,2025-11-22,0.0,0.0,0.56,4,5,23,1,112.76,115,125,0,91,2814448,Miss Glam Padang,21,0,0,0,0,,,60.71,0.0,0.0
3,4,ACNEMED,8995232702124,ACNEMED Facial Wash For Oily Skin 100gr,30220,36500,4690,B*,BEAUTY MATURE,Face Cleansing,Facial Wash,0,5,5,2025-11-21,0.0,0.0,0.08,4,1,6,3,5.66,6,25,0,21,634620,Miss Glam Padang,21,0,0,0,0,,,62.5,0.0,0.0
4,5,ACNES,8992821102372,ACNES Complete White Face Wash 100gr,26623,33500,1026,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,51,27,2025-11-20,0.0,0.0,0.86,2,4,15,3,58.27,60,92,0,66,1757118,Miss Glam Padang,21,0,0,0,0,,,31.4,0.0,0.0
5,6,ACNES,8992821102365,ACNES Complete White Face Wash 50gr,15974,19500,634,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,81,60,2025-11-23,0.0,0.0,1.37,2,5,15,3,72.25,75,115,0,54,862596,Miss Glam Padang,21,0,0,50,0,50.0,,43.8,0.0,0.0
6,7,ACNES,8992821100309,ACNES Creamy Wash 100gr,25395,31000,604,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,85,84,2025-11-22,0.0,0.0,1.44,2,6,15,3,87.12,90,138,0,54,1371330,Miss Glam Padang,21,0,0,50,0,50.0,,58.33,0.0,0.0
7,8,ACNES,8992821100392,ACNES Creamy Wash 50gr,15155,18500,669,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,77,52,2025-11-23,0.0,0.0,1.31,2,5,15,3,72.39,75,115,0,63,954765,Miss Glam Padang,21,0,0,50,0,50.0,,39.69,0.0,0.0
8,9,ACNES,8992821102396,ACNES Deep Pore Cleanser Face Wash 100g,26623,32500,979,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,53,33,2025-11-23,0.0,0.0,0.9,2,5,15,3,73.2,75,115,0,81,2156463,Miss Glam Padang,21,0,0,0,0,,,36.67,0.0,0.0
9,10,ACNES,8992821102389,ACNES Deep Pore Cleanser Face Wash 50gr,15974,19500,815,A,BEAUTY MATURE,Face Cleansing,Facial Wash,1,64,29,2025-11-23,0.0,0.0,1.08,2,4,15,3,57.83,60,92,0,63,1006362,Miss Glam Padang,21,0,0,50,0,50.0,,26.85,0.0,0.0



Processing 1.Miss glam Padang.csv...
  - Extracted location: GLAM PADANG
Available columns in input file: ...Index(['No', 'Brand', 'SKU', 'Nama', 'HPP', 'Harga', 'Ranking', 'Grade',
       'Kategori Brand', 'Kategori', 'Sub Kategori', 'Top4000', 'Terjual',
       'Stok', 'Last Sales', 'Lost Days', 'Velocity Capped', 'Daily Sales',
       'Lead Time', 'Max. Daily Sales', 'Max. Lead Time', 'Min. Order',
       'Safety Stok', 'ROP', '3W Cover', 'Sedang PO', 'Suggested', 'Amount',
       'Toko', 'Siklus', '[Brand] Promo Factor', '[Brand] Delay Factor',
       '[SKU] Promo Factor', '[SKU] Delay Factor',
       '[Master - SKU] Promo Factor', '[Master - SKU] Delay Factor',
       'Stock Cover', 'Days to Backup', 'Qty to Backup'],
      dtype='object')
Processing store: GLAM PADANG - 100.0%
Overriding with Padang sales data...
File saved to /Users/andresuchitra/dev/missglam/autopo/output/cleaned_po_with_suppliers/with_suppliers_1.Miss glam Padang.csv
File saved to /Users/andresuchitra/dev/mis

Unnamed: 0,file,location,contribution_pct,total_rows,padang_suppliers,other_suppliers,no_supplier,status,output_path
0,1.Miss glam Padang.csv,GLAM PADANG,100,6819,6680,17,122,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
1,10.Miss glam Palembang.csv,GLAM PALEMBANG,100,4907,4834,6,67,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
2,11.Miss glam Damar.csv,GLAM DAMAR,100,6712,6573,15,124,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
3,12.Miss glam Bangka.csv,GLAM BANGKA,100,4563,4479,6,78,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
4,13.Miss glam Payakumbuh.csv,GLAM PAYAKUMBUH,100,5334,5245,8,81,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
5,14.Miss glam Solok.csv,GLAM SOLOK,100,4675,4609,5,61,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
6,15.Miss glam Tembilahan.csv,GLAM TEMBILAHAN,100,4366,4294,6,66,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
7,16.Miss glam Lubuk Linggau.csv,GLAM LUBUK LINGGAU,100,4426,4361,7,58,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
8,17.Miss glam Dumai.csv,GLAM DUMAI,100,4782,4711,6,65,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...
9,18. Miss Glam Kedaton.csv,KEDATON,18,4233,4157,9,67,Success,/Users/andresuchitra/dev/missglam/autopo/outpu...



Sample of the last processed file:


Unnamed: 0,Brand,SKU,Nama,Toko,Stok,Orig Daily Sales,Orig Max. Daily Sales,Lead Time,Max. Lead Time,Min. Order,Sedang PO,HPP,contribution_pct,contribution_ratio,Lead Time Sedang PO,Is in Padang,Daily Sales,Max. Daily Sales,No,ID Supplier,Nama Supplier,ID Brand,Nama Brand,ID Store,Nama Store,Hari Order,Min. Purchase,Trading Term,Promo Factor,Delay Factor,Safety stock,Reorder point,Stock cover 30 days,current_stock_days_cover,is_open_po,initial_qty_po,emergency_po_qty,updated_regular_po_qty,final_updated_regular_po_qty,emergency_po_cost,final_updated_regular_po_cost
0,ACNAWAY,10400614911,ACNAWAY 3 in 1 Acne Sun Serum Sunscreen Serum ...,Miss Glam Medan,6.00,0.03,1.00,5.00,28.00,1,0.00,58524.00,100.00,1.00,0.00,0,0.03,1.00,2787.00,1.00,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.00,ACNAWAY,7.00,Miss Glam Padang,2.00,500000.00,0.00,,,28,29,1,200.00,0,0,0,0,0,0.00,0.00
1,ACNAWAY,10400517459,ACNAWAY Mugwort Daily Sunscreen Only For Acne ...,Miss Glam Medan,13.00,0.25,3.00,5.00,28.00,1,0.00,31000.00,100.00,1.00,0.00,0,0.25,3.00,2787.00,1.00,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.00,ACNAWAY,7.00,Miss Glam Padang,2.00,500000.00,0.00,,,83,85,8,52.00,0,0,0,0,0,0.00,0.00
2,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,Miss Glam Medan,0.00,0.25,2.00,5.00,28.00,1,12.00,31959.00,100.00,1.00,0.00,1,0.74,4.00,2787.00,1.00,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480.00,ACNAWAY,7.00,Miss Glam Padang,2.00,500000.00,0.00,,,109,113,23,0.00,1,11,0,11,11,0.00,351549.00
3,ACNES,8992821102372,ACNES Complete White Face Wash 100gr,Miss Glam Medan,8.00,0.24,2.00,5.00,12.00,3,0.00,26623.00,100.00,1.00,0.00,1,0.86,4.00,10975.00,33.00,PT. MENSA BINASUKSES - PPN (PDG),33.00,ACNES,7.00,Miss Glam Padang,3.00,500000.00,0.00,,,44,49,26,9.30,1,18,3,15,15,79869.00,399345.00
4,ACNES,8992821102365,ACNES Complete White Face Wash 50gr,Miss Glam Medan,0.00,0.09,1.00,5.00,12.00,3,6.00,15974.00,100.00,1.00,0.00,1,1.37,5.00,10975.00,33.00,PT. MENSA BINASUKSES - PPN (PDG),33.00,ACNES,7.00,Miss Glam Padang,3.00,500000.00,0.00,,,54,61,42,0.00,1,36,0,36,36,0.00,575064.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5784,YU CHUN,8997014402932,YU CHUN Mei Cordyceps Brightening Cleanser 100ml,Miss Glam Medan,2.00,0.02,1.00,1.00,2.00,3,0.00,33744.00,100.00,1.00,0.00,1,0.05,1.00,1722.00,3114.00,CV. TRIJAYA AGUNG UTAMA - PPN (PDG),489.00,YU CHUN,7.00,Miss Glam Padang,0.00,500000.00,0.00,,,2,3,2,40.00,0,0,0,0,0,0.00,0.00
5785,YU CHUN,8997014402703,YU CHUN Mei Cordyceps Lightening Day Cream 30gr,Miss Glam Medan,4.00,0.03,1.00,1.00,2.00,3,0.00,44289.00,100.00,1.00,0.00,1,0.12,1.00,1722.00,3114.00,CV. TRIJAYA AGUNG UTAMA - PPN (PDG),489.00,YU CHUN,7.00,Miss Glam Padang,0.00,500000.00,0.00,,,2,3,4,33.33,0,0,0,0,0,0.00,0.00
5786,YU CHUN,8997014402710,YU CHUN Mei Cordyceps Lightening Night Cream 30g,Miss Glam Medan,0.00,0.06,1.00,1.00,2.00,3,0.00,44289.00,100.00,1.00,0.00,1,0.08,1.00,1722.00,3114.00,CV. TRIJAYA AGUNG UTAMA - PPN (PDG),489.00,YU CHUN,7.00,Miss Glam Padang,0.00,500000.00,0.00,,,2,3,3,0.00,1,3,1,2,3,44289.00,132867.00
5787,YU CHUN,8997014402918,YU CHUN Mei Serum Whitening Essence 30ml,Miss Glam Medan,5.00,0.08,2.00,1.00,2.00,3,0.00,49062.00,100.00,1.00,0.00,1,0.05,1.00,1722.00,3114.00,CV. TRIJAYA AGUNG UTAMA - PPN (PDG),489.00,YU CHUN,7.00,Miss Glam Padang,0.00,500000.00,0.00,,,2,3,2,100.00,0,0,0,0,0,0.00,0.00
