# Stock health Dashboard

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output

In [31]:
from pathlib import Path
import pandas as pd

# Path to the output/complete directory
output_dir = Path('/Users/andresuchitra/dev/missglam/autopo/notebook/output/complete')

# Find the first file (CSV or Excel)
data_file = next(output_dir.glob('*.*'), None)
if not data_file:
    raise FileNotFoundError("No data files found in the output/complete directory")

print(f"Examining file: {data_file.name}")

# First, let's look at the raw content
with open(data_file, 'r', encoding='latin1') as f:
    # Print first 5 lines to understand the structure
    print("\nFirst 5 lines of the file:")
    for i, line in enumerate(f):
        if i >= 5:
            break
        print(f"Line {i+1}: {line.strip()}")

# Try to read with different delimiters
delimiters = [',', ';', '\t']
for delim in delimiters:
    try:
        print(f"\nTrying delimiter: {repr(delim)}")
        df_sample = pd.read_csv(data_file, sep=delim, decimal=',', nrows=5, encoding='latin1', on_bad_lines='warn')
        print("Success! File structure:")
        print(df_sample.head())
        print("\nColumns found:")
        print(df_sample.columns.tolist())
        break
    except Exception as e:
        print(f"Failed with {delim}: {str(e)}")
else:
    # If all delimiters fail, try reading with Python's csv module for more detailed error
    import csv
    print("\nTrying with Python's csv module...")
    with open(data_file, 'r', encoding='latin1') as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            if i >= 5:
                break
            print(f"Row {i+1}: {row}")

Examining file: 10. Miss Glam Palembang.csv

First 5 lines of the file:
Line 1: ï»¿Brand;SKU;Nama;Toko;Stok;Daily Sales;Max. Daily Sales;Lead Time;Max. Lead Time;Min. Order;Sedang PO;HPP;Harga;contribution_pct;contribution_ratio;Is in Padang;Orig Daily Sales;Orig Max. Daily Sales;sales_contribution;Safety stock;Reorder point;target_days;target_days_cover;current_stock_days_cover;is_open_po;initial_qty_po;emergency_po_qty;updated_regular_po_qty;final_updated_regular_po_qty;emergency_po_cost;final_updated_regular_po_cost;No;ID Supplier;Nama Supplier;ID Brand;Nama Brand;ID Store;Nama Store;Hari Order;Min. Purchase;Trading Term;Promo Factor;Delay Factor
Line 2: ACNAWAY;101001107647;ACNAWAY Mugwort Gel Facial Wash Mugwort + Centella + Panthenol 100ml;Miss Glam Palembang;0;0,20;1,04;4;24;1;0;33.500;45.000;26;0,26;1;0,18;2;8.892;25;26;30;6;0;1;6;5;1;1;167.500;33.500;2.796;1.0;PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT);1.480;ACNAWAY;20;Miss Glam Palembang;2;500.000;0;;
Line 3: ACNES;899282


Skipping line 3: expected 5 fields, saw 7
Skipping line 4: expected 5 fields, saw 7
Skipping line 5: expected 5 fields, saw 7
Skipping line 6: expected 5 fields, saw 7
Skipping line 8: expected 5 fields, saw 7
Skipping line 10: expected 5 fields, saw 7
Skipping line 11: expected 5 fields, saw 6
Skipping line 12: expected 5 fields, saw 7
Skipping line 13: expected 5 fields, saw 6
Skipping line 15: expected 5 fields, saw 7
Skipping line 16: expected 5 fields, saw 7




# Read Dataframe from output folder first


In [3]:
from pathlib import Path
import pandas as pd
from datetime import datetime

DATA_DIR = Path("output/complete")

def load_store_data(is_csv_only=False, is_excel_only=False):
    """
    Load and combine data from all store files, handling decimal commas correctly
    """
    all_data = []
    output_dir = Path('/Users/andresuchitra/dev/missglam/autopo/notebook/output/complete')
    
    for file_path in output_dir.glob('*.*'):
        try:
            # Skip non-data files
            if file_path.suffix.lower() not in ['.csv', '.xlsx', '.xls']:
                continue
            # if is_csv_only, only include files with .csv extension
            if is_csv_only and file_path.suffix.lower() != '.csv':
                continue
            # if is_excel_only, only include files with .xlsx or .xls extension
            if is_excel_only and file_path.suffix.lower() not in ['.xlsx', '.xls']:
                continue

            print(f"Processing {file_path.name}...")
            
            # Read the file
            if file_path.suffix.lower() == '.csv':
                # First try to detect the delimiter
                # First detect encoding and BOM
                encoding = 'utf-8-sig'

                with open(file_path, 'rb') as f:
                    raw = f.read(4)
                    if raw.startswith(b'\xef\xbb\xbf'):
                        encoding = 'utf-8-sig'  # Handles BOM
                    else:
                        encoding = 'latin1'  # Fallback encoding

                with open(file_path, 'r', encoding=encoding) as f:
                    first_line = f.readline().strip()
                    delimiter = ';' if ';' in first_line else ','
                
                # Read with decimal comma handling
                df = pd.read_csv(
                    file_path,
                    sep=delimiter,
                    decimal=',',  # This tells pandas to use comma as decimal
                    thousands='.',  # And period as thousands separator if needed
                    encoding=encoding,
                    on_bad_lines='warn'
                )
            else:  # Excel file
                df = pd.read_excel(file_path, engine='openpyxl')
                
                # Convert string columns with decimal commas to numeric
                for col in df.select_dtypes(include=['object']).columns:
                    if df[col].astype(str).str.contains(',').any():
                        try:
                            # Try converting to float, handling decimal commas
                            df[col] = df[col].astype(str).str.replace('.', '').str.replace(',', '.').astype(float)
                        except:
                            pass  # If conversion fails, leave as is
                
            # Clean column names
            df.columns = [str(col).strip() for col in df.columns]
            
            # rename current_stock_day_cover to daily_stock_cover
            df.rename(columns={
                'Brand':'brand', 
                'current_stock_days_cover': 'daily_stock_cover', 
                'SKU': 'sku', 
                'Stok': 'stock',
                'Toko': 'store',
                'HPP': 'hpp', 
                'Harga': 'harga'}, inplace=True)
                
            all_data.append(df)
            
        except Exception as e:
            print(f"Error processing {file_path.name}: {str(e)}")
            continue
    
    if not all_data:
        raise ValueError("No valid data files could be processed")
    
    # Combine all dataframes
    combined_df = pd.concat(all_data, ignore_index=True)
    print(f"\nCombined data shape: {combined_df.shape}")

    return combined_df

# Test the function
df = load_store_data()

# Example usage:
df = load_store_data(is_csv_only=True)

# set notebook to show all columns in display
pd.set_option('display.max_columns', None)

display(df.head())

# save to csv 'health_monitor.csv', with curren date '20251128' format
df.to_csv(f"health_monitor/{datetime.now().strftime('%Y%m%d')}.csv", index=False)

Processing 10. Miss Glam Palembang.csv...
Processing 20. Miss Glam Tanjung Pinang.csv...
Processing 1. Miss Glam Padang.csv...
Processing 6. Miss Glam Muaro Bungo.csv...
Processing 5. Miss Glam Panam.csv...
Processing 7. Miss Glam Lampung.csv...
Processing 29. Miss Glam Marpoyan.csv...
Processing 25. Miss Glam Sudirman.csv...
Processing 23. Miss Glam Halat.csv...
Processing 22. Miss Glam Pasaman Barat.csv...
Processing 16. Miss Glam Lubuk Linggau.csv...
Processing 24. Miss Glam Duri.csv...
Processing 9. Miss Glam Medan.csv...
Processing 18. Miss Glam Kedaton.csv...
Processing 8. Miss Glam Bengkulu.csv...
Processing 31. Miss Glam Mayang.csv...
Processing 15. Miss Glam Tembilahan.csv...
Processing 2. Miss Glam Pekanbaru.csv...
Processing 28. Miss Glam Aceh.csv...
Processing 27. Miss Glam P. Sidimpuan.csv...
Processing 4. Miss Glam Bukittinggi.csv...
Processing 32. Miss Glam Soeta.csv...
Processing 26. Miss Glam Dr. Mansyur.csv...
Processing 21. Miss Glam Sutomo.csv...
Processing 12. Miss

Unnamed: 0,brand,sku,Nama,store,stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Min. Order,Sedang PO,hpp,harga,contribution_pct,contribution_ratio,Is in Padang,Orig Daily Sales,Orig Max. Daily Sales,sales_contribution,Safety stock,Reorder point,target_days,target_days_cover,daily_stock_cover,is_open_po,initial_qty_po,emergency_po_qty,updated_regular_po_qty,final_updated_regular_po_qty,emergency_po_cost,final_updated_regular_po_cost,No,ID Supplier,Nama Supplier,ID Brand,Nama Brand,ID Store,Nama Store,Hari Order,Min. Purchase,Trading Term,Promo Factor,Delay Factor
0,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,Miss Glam Palembang,0,0.2,1.04,4,24,1,0,33500,45000,26,0.26,1,0.18,2.0,8892.0,25,26,30,6,0.0,1,6,5,1,1,167500,33500,2796.0,10.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480,ACNAWAY,20,Miss Glam Palembang,2,500000,0,,
1,ACNES,8992821102372,ACNES Complete White Face Wash 100gr,Miss Glam Palembang,15,0.22,1.04,3,7,3,0,26623,32500,26,0.26,1,0.2,2.0,7182.5,7,8,30,7,67.87,0,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,
2,ACNES,8992821102365,ACNES Complete White Face Wash 50gr,Miss Glam Palembang,11,0.35,1.3,3,7,3,0,15974,19500,26,0.26,1,0.17,2.0,6793.8,9,11,30,11,31.57,0,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,
3,ACNES,8992821100309,ACNES Creamy Wash 100gr,Miss Glam Palembang,4,0.36,1.56,3,7,3,7,25395,31000,26,0.26,1,0.39,3.0,11203.4,10,12,30,11,11.07,1,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,
4,ACNES,8992821100392,ACNES Creamy Wash 50gr,Miss Glam Palembang,1,0.34,1.3,3,7,3,11,15155,18500,26,0.26,1,0.17,1.0,6204.9,9,11,30,11,2.98,1,0,1,0,0,15155,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,


# Calculate Health

In [None]:
def format_currency(value):
    """Format value to Indonesian Rupiah format"""
    if pd.isna(value):
        return "Rp. 0"
    
    value = float(value)
    
    # For values >= 1 billion (1 Milyar)
    if abs(value) >= 1e9:
        return f"Rp. {value/1e9:,.2f} M".replace('.', '|').replace(',', '.').replace('|', ',')
    # For values >= 1 million (1 Juta)
    elif abs(value) >= 1e6:
        return f"Rp. {value/1e6:,.1f} Jt".replace('.', '|').replace(',', '.').replace('|', ',')
    # For values >= 1000 (1 Ribu)
    elif abs(value) >= 1000:
        return f"Rp. {value/1000:,.1f} Rb".replace('.', '|').replace(',', '.').replace('|', ',')
    else:
        return f"Rp. {int(value):,}".replace(',', '.')

def calculate_stock_health(df):
    # Ensure required columns exist
    required_cols = ['daily_stock_cover', 'hpp', 'stock', 'brand', 'sku', 'store']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Define health groups
    conditions = [
        (df['daily_stock_cover'] > 30),
        (df['daily_stock_cover'] > 20) & (df['daily_stock_cover'] <= 30),
        (df['daily_stock_cover'] > 6) & (df['daily_stock_cover'] <= 20),
        (df['daily_stock_cover'] > 0) & (df['daily_stock_cover'] <= 6),
        (df['daily_stock_cover'] <= 0)
    ]
    
    health_labels = ['Blue (30+ days)', 'Green (21-30 days)', 'Yellow (7-20 days)', 
                    'Red (1-6 days)', 'Black (0 days)']
    health_colors = ['#1f77b4', '#2ca02c', '#ffd700', '#ff7f0e', '#000000']
    
    df['Health_Group'] = np.select(conditions, health_labels, default='Unknown')
    df['Health_Color'] = np.select(conditions, health_colors, default='#CCCCCC')
    df['Total_Value'] = df['hpp'] * df['stock']
    
    return df, health_labels, health_colors

def prepare_filters(df):
    # Get unique SKUs for autocomplete
    unique_skus = sorted(df['sku'].dropna().astype(str).unique().tolist())
    
    # Create filter widgets
    brand_dropdown = widgets.Dropdown(
        options=['All'] + sorted(df['brand'].dropna().unique().tolist()),
        value='All',
        description='Brand:',
        layout=widgets.Layout(width='300px')
    )

    store_dropdown = widgets.Dropdown(
        options=['All'] + sorted(df['store'].dropna().unique().tolist()),
        value='All',
        description='Store:',
        layout=widgets.Layout(width='300px')
    )

    # Create a Combobox for SKU search with autocomplete
    sku_search = widgets.Combobox(
        placeholder='Start typing SKU...',
        options=unique_skus,
        description='SKU Search:',
        ensure_option=True,
        layout=widgets.Layout(width='300px')
    )
    
    # Add a clear button for the SKU search
    clear_sku_btn = widgets.Button(
        description='Clear',
        button_style='',
        tooltip='Clear SKU search',
        layout=widgets.Layout(width='80px')
    )
    
    def on_clear_clicked(b):
        sku_search.value = ''
        update_dashboard(None)
    
    clear_sku_btn.on_click(on_clear_clicked)

    # Create output widget for the figure
    output = widgets.Output()

    return brand_dropdown, store_dropdown, sku_search, clear_sku_btn, output

def update_dashboard(change):
    with output:
        clear_output(wait=True)
        
        # Apply filters
        filtered_df = df.copy()
        
        if brand_dropdown.value != 'All':
            filtered_df = filtered_df[filtered_df['brand'] == brand_dropdown.value]
            
        if store_dropdown.value != 'All':
            filtered_df = filtered_df[filtered_df['store'] == store_dropdown.value]
            
        if sku_search.value:
            filtered_df = filtered_df[filtered_df['sku'].astype(str).str.contains(str(sku_search.value), case=False, na=False)]
        
        if filtered_df.empty:
            print("No data matches the selected filters.")
            return
            
        # Create summary metrics
        total_skus = len(filtered_df)
        total_stores = filtered_df['store'].nunique()
        total_value = filtered_df['Total_Value'].sum() / 1e6  # in millions

        # Format the total value for display
        formatted_total_value = format_currency(filtered_df['Total_Value'].sum())
        
        # Create health group summary
        health_summary = filtered_df.groupby('Health_Group').agg(
            SKU_Count=('sku', 'count'),
            Total_Qty=('stock', 'sum'),
            Total_Value=('Total_Value', 'sum')
        ).reindex(health_labels).reset_index()
        
        # Create figures
        fig = make_subplots(
            rows=2, cols=2,
            specs=[[{"type": "pie"}, {"type": "bar"}],
                  [{"type": "bar", "colspan": 2}, None]],
            subplot_titles=("SKU Distribution by Health Group", 
                           "Total Value by Health Group",
                           "Stock Quantity by Health Group")
        )
        
        # Pie chart for SKU distribution
        fig.add_trace(
            go.Pie(
                labels=health_summary['Health_Group'],
                values=health_summary['SKU_Count'],
                marker_colors=health_colors,
                name="SKU Distribution",
                hole=0.4
            ),
            row=1, col=1
        )
        

        # Bar chart for total value
        fig.add_trace(
            go.Bar(
                x=health_summary['Health_Group'],
                y=health_summary['Total_Value'],
                marker_color=health_colors,
                name="Total Value",
                text=[format_currency(val) for val in health_summary['Total_Value']],
                textposition='auto',
                texttemplate='%{text}',
                textfont=dict(size=10)
            ),
            row=1, col=2
        )

        # Update y-axis to show full values without scientific notation
        fig.update_yaxes(
            tickformat=".0f",
            row=1, col=2
        )
       
        # Bar chart for total quantity
        fig.add_trace(
            go.Bar(
                x=health_summary['Health_Group'],
                y=health_summary['Total_Qty'],
                marker_color=health_colors,
                name="Total Quantity"
            ),
            row=2, col=1
        )
        
        # Update layout
        fig.update_layout(
            title_text=f"Stock Health Dashboard - {brand_dropdown.value if brand_dropdown.value != 'All' else 'All Brands'} - {store_dropdown.value if store_dropdown.value != 'All' else 'All Stores'}",
            height=900,
            showlegend=False,
            template="plotly_white"
        )

        # Add annotations for summary metrics - positioned in the bottom left corner
        fig.add_annotation(
            text=f"<b>Total SKUs:</b> {total_skus:,}<br>" +
                 f"<b>Total Stores:</b> {total_stores}<br>" +
                 f"<b>Total Value:</b> {formatted_total_value}",
            align='left',
            showarrow=False,
            xref='paper',
            yref='paper',
            x=0.02,     # Slightly in from left edge
            y=0.02,     # Slightly up from bottom edge
            xanchor='left',
            yanchor='bottom',
            bordercolor='black',
            borderwidth=1,
            bgcolor='white',
            xshift=10,
            yshift=10
        )
        
        # Format the y-axis of the value chart
        fig.update_yaxes(
            tickformat=".2s",  # Use SI prefix (k, M, B, etc.)
            row=1, col=2
        )
        
        # Format the y-axis of the quantity chart
        fig.update_yaxes(
            tickformat=".2s",  # Use SI prefix (k, M, B, etc.)
            row=2, col=1
        )

        
        # Show the figure
        fig.show()

"""
Main entrypoint
"""
display('input df', df.head())
df, health_labels, health_colors = calculate_stock_health(df)
brand_dropdown, store_dropdown, sku_search, clear_sku_btn, output = prepare_filters(df)

# Set up observers
brand_dropdown.observe(update_dashboard, names='value')
store_dropdown.observe(update_dashboard, names='value')
sku_search.observe(update_dashboard, names='value')

# Create a horizontal box for the SKU search and clear button
sku_search_box = widgets.HBox([sku_search, clear_sku_btn])

# Display the widgets and initial dashboard
display(widgets.VBox([
    widgets.HBox([brand_dropdown, store_dropdown]),
    sku_search_box,
    output
]))

# Initial update
update_dashboard(None)

'input df'

Unnamed: 0,brand,sku,Nama,store,stock,Daily Sales,Max. Daily Sales,Lead Time,Max. Lead Time,Min. Order,Sedang PO,hpp,harga,contribution_pct,contribution_ratio,Is in Padang,Orig Daily Sales,Orig Max. Daily Sales,sales_contribution,Safety stock,Reorder point,target_days,target_days_cover,daily_stock_cover,is_open_po,initial_qty_po,emergency_po_qty,updated_regular_po_qty,final_updated_regular_po_qty,emergency_po_cost,final_updated_regular_po_cost,No,ID Supplier,Nama Supplier,ID Brand,Nama Brand,ID Store,Nama Store,Hari Order,Min. Purchase,Trading Term,Promo Factor,Delay Factor,Health_Group,Health_Color,Total_Value
0,ACNAWAY,101001107647,ACNAWAY Mugwort Gel Facial Wash Mugwort + Cent...,Miss Glam Palembang,0,0.2,1.04,4,24,1,0,33500,45000,26,0.26,1,0.18,2.0,8892.0,25,26,30,6,0.0,1,6,5,1,1,167500,33500,2796.0,10.0,PT. BERSAMA DISTRIVERSA INDONESIA (DC CIPUTAT),1480,ACNAWAY,20,Miss Glam Palembang,2,500000,0,,,Black (0 days),#000000,0
1,ACNES,8992821102372,ACNES Complete White Face Wash 100gr,Miss Glam Palembang,15,0.22,1.04,3,7,3,0,26623,32500,26,0.26,1,0.2,2.0,7182.5,7,8,30,7,67.87,0,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,,Blue (30+ days),#1f77b4,399345
2,ACNES,8992821102365,ACNES Complete White Face Wash 50gr,Miss Glam Palembang,11,0.35,1.3,3,7,3,0,15974,19500,26,0.26,1,0.17,2.0,6793.8,9,11,30,11,31.57,0,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,,Blue (30+ days),#1f77b4,175714
3,ACNES,8992821100309,ACNES Creamy Wash 100gr,Miss Glam Palembang,4,0.36,1.56,3,7,3,7,25395,31000,26,0.26,1,0.39,3.0,11203.4,10,12,30,11,11.07,1,0,0,0,0,0,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,,Yellow (7-20 days),#ffd700,101580
4,ACNES,8992821100392,ACNES Creamy Wash 50gr,Miss Glam Palembang,1,0.34,1.3,3,7,3,11,15155,18500,26,0.26,1,0.17,1.0,6204.9,9,11,30,11,2.98,1,0,1,0,0,15155,0,11066.0,25600.0,PT. MENSA BINASUKSES - PPN (PLB),33,ACNES,20,Miss Glam Palembang,3,500000,0,,,Red (1-6 days),#ff7f0e,15155


VBox(children=(HBox(children=(Dropdown(description='Brand:', layout=Layout(width='300px'), options=('All', 'AC…

# Master Data

In [20]:
# read 'data/supplier.csv'
import pandas as pd
import os

MASTER_DATA_DIR = "data/master_data"

supplier_df = pd.read_csv('data/supplier.csv', sep=';')
display(supplier_df)

Unnamed: 0,No,ID Supplier,Nama Supplier,ID Brand,Nama Brand,ID Store,Nama Store,Hari Order,Min. Purchase,Trading Term,Promo Factor,Delay Factor
0,1,,,1756,BLOOD,37,PT Bersama Distriversa Indonesia,1,500000,0,,
1,2,,,875,JF THE SKIN SPECIALIST,32,Miss Glam Pasaman Barat,1,500000,0,,
2,3,,,875,JF THE SKIN SPECIALIST,34,Miss Glam Duri,1,500000,0,,
3,4,,,875,JF THE SKIN SPECIALIST,38,Miss Glam P. Sidimpuan,1,500000,0,,
4,5,,,875,JF THE SKIN SPECIALIST,39,Miss Glam Aceh,1,500000,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
13586,13587,3433.0,UD. WIRA JAYA SUKSES - NON PPN - SILKORO/SUMME...,357,SILKORO,40,Miss Glam Marpoyan,4,500000,0,,
13587,13588,3433.0,UD. WIRA JAYA SUKSES - NON PPN - SILKORO/SUMME...,357,SILKORO,41,Miss Glam Sei Penuh,4,500000,0,,
13588,13589,3433.0,UD. WIRA JAYA SUKSES - NON PPN - SILKORO/SUMME...,357,SILKORO,42,Miss Glam Mayang,4,500000,0,,
13589,13590,3421.0,VIOPAD (ONLINE),2288,VIOPAD,37,PT Bersama Distriversa Indonesia,1,500000,0,,


In [21]:
# create master data directory if not exists
os.makedirs(MASTER_DATA_DIR, exist_ok=True)

# generate unique store data: id (get from 'ID Store'), name
store_df = pd.DataFrame(supplier_df['ID Store'].unique(), columns=['id'])
store_df['name'] = store_df['id'].map(supplier_df.set_index('ID Store')['Nama Store'].to_dict())

store_df.sort_values(by='id', inplace=True)

store_df

# save to csv
store_df.to_csv(MASTER_DATA_DIR + '/store.csv', index=False)

# Generate master supplier data

In [29]:
# Get unique suppliers
master_supplier_df = pd.DataFrame(supplier_df['ID Supplier'].dropna().unique(), columns=['id'])
master_supplier_df['name'] = master_supplier_df['id'].map(
    supplier_df.drop_duplicates('ID Supplier').set_index('ID Supplier')['Nama Supplier'].to_dict()
)

# column 'id' as int
master_supplier_df['id'] = master_supplier_df['id'].astype(int)

display(master_supplier_df)

# save to master data directory
master_supplier_df.to_csv(MASTER_DATA_DIR + '/supplier.csv', index=False)

Unnamed: 0,id,name
0,3180,ANUGERAH NIAGA JAYA (DMI)
1,3694,ANUGERAH PHARMINDO LESTARI - PPN (BPP)
2,3343,ANUGERAH PHARMINDO LESTARI - PPN (PKU)
3,3604,APOTEK MEDIZONE - NON PPN - CASH (DC)
4,2482,ASIA WIJAYA MAKMUR - NON PPN (MDN)
...,...,...
486,2407,UD. NURIN MAKMUR - PPN (BKL)
487,2706,UD. SINAR MUTIARA BARU (PYK)
488,3433,UD. WIRA JAYA SUKSES - NON PPN - SILKORO/SUMME...
489,3421,VIOPAD (ONLINE)


# generate master brand data

In [30]:
# Get unique brand
master_brand_df = pd.DataFrame(supplier_df['ID Brand'].dropna().unique(), columns=['id'])
master_brand_df['name'] = master_brand_df['id'].map(
    supplier_df.drop_duplicates('ID Brand').set_index('ID Brand')['Nama Brand'].to_dict()
)

# column 'id' as int
master_brand_df['id'] = master_brand_df['id'].astype(int)

display(master_brand_df)

# save to master data directory
master_brand_df.to_csv(MASTER_DATA_DIR + '/brand.csv', index=False)

Unnamed: 0,id,name
0,1756,BLOOD
1,875,JF THE SKIN SPECIALIST
2,2247,JUDYDOLL
3,384,MBK
4,111,MUSTIKA RATU
...,...,...
500,716,BEAUTICA
501,1779,MONTISS
502,1734,SKIN SANE
503,149,SR 12
