In [42]:
import pandas as pd
import numpy as np
import re

In [43]:
init_df = pd.read_excel('PurchaseRequirement1.xlsx', engine = 'openpyxl')

In [44]:
#Initial filters on itemcode from purchaseReuirement
init_df = init_df.drop(columns=['Unnamed: 0'])
init_df = init_df[~init_df['itemcode'].str.startswith(('RAW', 'KRI', 'F'), na=False)]
init_df = init_df[~init_df['itemcode'].str.endswith(('R', 'N'), na=False)]

In [45]:
#filters for date, profit with div# and numeric 0
numeric_columns = init_df.select_dtypes(include=['int64', 'float64']).columns
combinedate_condition = init_df['combinedate'] == '1970-01-01'
numeric_condition = (init_df[numeric_columns] == 0).all(axis=1)
combined_condition = combinedate_condition & numeric_condition
init_df = init_df[~combined_condition]
#all out-of-date products need to be reviewed
init_df = init_df[(init_df['combinedate'] != '1970-01-01') & (init_df['combinedate'] >= '2020-01-01')]
#create and sort by profit
init_df['Profit%'] = (((init_df['average sales price'] - init_df['cost'])*init_df['n-mean'])/(init_df['n-mean']*init_df['average sales price']))*100
init_df = init_df[~init_df['Profit%'].isna()]
init_df = init_df[init_df['Profit%'] != -np.inf]
init_df['Stockestimate'] = init_df['final_stock'] - init_df['n-mean']
init_df = init_df[init_df['Stockestimate'] <= 0]
init_df = init_df.sort_values(by='Profit%', ascending=False)

In [46]:
#Product details - shipment related - MOQ, Pcs/Ctn, Ctn Weight, Cbm/ Ctn
details_df = pd.read_excel('AllSalesConfirmationDatabaseUpdated.xlsx', engine = 'openpyxl')
details_df = details_df.drop(columns=['Packing ', 'Remarks', 'Shipment', 'Unnamed: 21'])
details_df = details_df.dropna(subset=['Quantity', 'Pcs/ Ctn', 'carton weight', 'CBM/ Ctn'])
details_df['Code'] = details_df['Code'].astype(str)

In [47]:
details_df = details_df.groupby(['Code']).agg({
    'Quantity': 'min',
    'Pcs/ Ctn': lambda x: round(x.mean()),
    'carton weight': lambda x: round(x.mean()),
    'CBM/ Ctn': lambda x: x.mean()
}).reset_index()

In [48]:
selected_init_df = init_df[['itemcode', 'itemname','n-mean']]
merged_df = selected_init_df.merge(details_df, left_on='itemcode', right_on='Code', how='inner')

In [49]:
merged_df['MOQ'] = merged_df['Quantity']/merged_df['Pcs/ Ctn']
merged_df = merged_df.drop(columns=['Quantity'])

In [50]:
doz_df = pd.read_excel('item-doz.xlsx', engine = 'openpyxl')
doz_df = doz_df[doz_df['xunitpck']=='Dozen']

In [51]:
matching_products = merged_df['itemcode'].isin(doz_df['xitem'])
merged_df.loc[matching_products, 'Pcs/ Ctn'] = merged_df.loc[matching_products, 'Pcs/ Ctn'] / 12

In [52]:
cost_df = pd.read_excel('AllPriceCalculationDataUpdated.xlsx', engine = 'openpyxl')
cost_df = cost_df[['Code','Product Name','Import Price/ Unit Tk.','Prime Cost with VAT+Packing','New Selling Price/ Unit (WS)','Shipment ']]
cost_df['Code'] = cost_df['Code'].astype(str)
cost_df = cost_df.sort_values(by='Code',ascending=True)

In [53]:
#Shipment year and number for sorting
def extract_year(shipment):
    # KF002/23 -> 23
    match_last_two_digits = re.search(r'/(\d{2})$', shipment)
    if match_last_two_digits:
        return int(match_last_two_digits.group(1))
    
    # KH18-801 or KF18-801 -> 18
    match_first_two_digits = re.search(r'[A-Z]{2}(\d{2})-', shipment)
    if match_first_two_digits:
        return int(match_first_two_digits.group(1))
    
    return None

def extract_shipment_number(shipment):
    # KF002/23 -> 002 --> 2
    match_shipment_number_slash = re.search(r'[A-Z]{2}(\d{3})/', shipment)
    if match_shipment_number_slash:
        return int(match_shipment_number_slash.group(1)[-1])  

    # KH18-801 or KF18-801 -> 801 --> 1
    match_shipment_number_hyphen = re.search(r'[A-Z]{2}\d{2}-(\d+)', shipment)
    if match_shipment_number_hyphen:
        return int(match_shipment_number_hyphen.group(1)[-1]) 

    return None

cost_df['Year'] = cost_df['Shipment '].apply(extract_year)
cost_df['Shipment#'] = cost_df['Shipment '].apply(extract_shipment_number)

In [54]:
#data clean up, by filling in missing import/price/sales prices by taken the next ratio
for product_code, group in cost_df.groupby('Code'):
    indices_to_drop = []
    for i in group.index:
        if pd.isna(cost_df.loc[i, 'Import Price/ Unit Tk.']) and pd.isna(cost_df.loc[i, 'Prime Cost with VAT+Packing']):
            indices_to_drop.append(i)
            continue

        if pd.isna(cost_df.loc[i, 'New Selling Price/ Unit (WS)']) and pd.isna(cost_df.loc[i, 'Prime Cost with VAT+Packing']):
            indices_to_drop.append(i)
            continue

        if pd.isna(cost_df.loc[i, 'Import Price/ Unit Tk.']) and pd.isna(cost_df.loc[i, 'New Selling Price/ Unit (WS)']):
            indices_to_drop.append(i)
            continue

        if pd.isna(cost_df.loc[i, 'Prime Cost with VAT+Packing']):
            previous_index = i - 1
            if previous_index >= 0 and cost_df.loc[previous_index, 'Code'] == product_code:
                import_price_ratio = cost_df.loc[i, 'Import Price/ Unit Tk.'] / cost_df.loc[previous_index, 'Import Price/ Unit Tk.']
                cost_df.loc[i, 'Prime Cost with VAT+Packing'] = cost_df.loc[previous_index, 'Prime Cost with VAT+Packing'] * import_price_ratio

        if pd.isna(cost_df.loc[i, 'Import Price/ Unit Tk.']):
            previous_index = i - 1
            if previous_index >= 0 and cost_df.loc[previous_index, 'Code'] == product_code:
                prime_cost_ratio = cost_df.loc[i, 'Prime Cost with VAT+Packing'] / cost_df.loc[previous_index, 'Prime Cost with VAT+Packing']
                cost_df.loc[i, 'Import Price/ Unit Tk.'] = cost_df.loc[previous_index, 'Import Price/ Unit Tk.'] * prime_cost_ratio

        if pd.isna(cost_df.loc[i, 'New Selling Price/ Unit (WS)']):
            previous_index = i - 1
            if previous_index >= 0 and cost_df.loc[previous_index, 'Code'] == product_code:
                prime_cost_ratio = cost_df.loc[i, 'Prime Cost with VAT+Packing'] / cost_df.loc[previous_index, 'Prime Cost with VAT+Packing']
                cost_df.loc[i, 'New Selling Price/ Unit (WS)'] = cost_df.loc[previous_index, 'New Selling Price/ Unit (WS)'] * prime_cost_ratio
                
    cost_df.drop(index=indices_to_drop, inplace=True)
cost_df.reset_index(drop=True, inplace=True)

In [55]:
costl_df = cost_df[cost_df['Year'] >= 21]
costl_df = costl_df.sort_values(by=['Code', 'Year', 'Shipment#'], ascending=[True, False, False])
costlop_df = costl_df.drop_duplicates(subset=['Code'], keep='first')

In [56]:
final_merged_df = pd.merge(merged_df, costlop_df, left_on='itemcode', right_on='Code', how='inner')
final_merged_df = final_merged_df.drop(columns=['Code_x', 'Code_y', 'Product Name'])

In [57]:
final_merged_df['Price/unit'] = final_merged_df['Pcs/ Ctn'] * final_merged_df['Import Price/ Unit Tk.']
final_merged_df['Profit/unit'] = (final_merged_df['New Selling Price/ Unit (WS)'] - final_merged_df['Prime Cost with VAT+Packing']) * final_merged_df['Pcs/ Ctn']
final_merged_df['Demand/month'] = final_merged_df['n-mean']/final_merged_df['Pcs/ Ctn']
final_merged_df.rename(columns={'MOQ': 'MOQ/unit',
                                'carton weight': 'Weight/unit',
                                'CBM/ Ctn': 'CBM/unit',
                                'itemcode': 'Code',
                                'itemname': 'Name',
                                'Pcs/ Ctn': 'Pcs/Ctn'}, inplace=True)
final_merged_df = final_merged_df[['Code', 'Name', 'Demand/month','MOQ/unit', 'CBM/unit', 'Weight/unit', 'Price/unit', 'Profit/unit','Pcs/Ctn']]


In [58]:
# for linear
final_merged_df_lin = final_merged_df[['Code', 'Name', 'Demand/month','MOQ/unit', 'CBM/unit', 'Weight/unit', 'Price/unit', 'Profit/unit']]
final_merged_df_lin.to_excel('shipment_linear.xlsx')

In [59]:
# for stochastic optimization with scenario analysis

In [60]:
final_merged_df_stoch = final_merged_df[['Code', 'Name','MOQ/unit', 'CBM/unit', 'Weight/unit', 'Price/unit','Pcs/Ctn']]
final_merged_df_stoch.to_excel('shipment_stochastic.xlsx')

In [20]:
# use list of consequent products within basket analysis, to query the database to bring in consequent products, if not within purchase requirement
# then use linear optimization with complementary constraint.