<a href="https://www.kaggle.com/code/anggoletomi/package-box-size-optimization?scriptVersionId=105736412" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

We are an online seller that looking for the most fit box dimension that we will use as shipping box to end customers. The goal is to find the most efficient box size that isn't too big to take up a lot of unused space or too small to fit multiple products. We will use the sales data of the last few months to determine the right box size.

## CREATE BOX DIMENSION LIST

In [None]:
import itertools
import pandas as pd
from tqdm import tqdm
from py3dbp import Packer, Bin, Item

## create list with loop

length = []

for x in range(10,30): # min.size is 10cm, max.size is 40cm
    length.append(x)

width, height = length, length

MAX_WEIGHT = 10 ** 9

In [None]:
list_box = [length,width,height]

box_dimension = list(itertools.product(*list_box))

df_box = pd.DataFrame(box_dimension,dtype=str)

df_box.columns = ['length','width','height']

for fl in df_box.columns:
    df_box[fl] = df_box[fl].astype(float)

df_box['id'] = df_box.index + 1

df_box['box_number'] = "BOX-"
df_box['box_number'] = df_box['box_number'] + df_box['id'].astype(str)

df_box['box_dimension'] = df_box['length'].astype(str) + ' x ' + df_box['width'].astype(str) + ' x ' + df_box['height'].astype(str)

df_box = df_box[['id','box_number','length','width','height','box_dimension']]

box_dimension_df = df_box.copy()

print(f'There are {len(box_dimension)} possibility of box dimension')

In [None]:
box_dimension_df.info()

## GET THE SKU LIST

In [None]:
sku_dimension_df = pd.read_csv("/kaggle/input/uq-retail-order/sku_dimensions.csv")

sku_dimension_df.head(5)

In [None]:
sku_dimension_df.info()

## GET THE ORDER LIST

In [None]:
order_sku_df = pd.read_csv("/kaggle/input/uq-retail-order/uq_orders.csv")

sku_dimension_df.head(5)

In [None]:
sku_dimension_df.info()

## CALCULATE THE BOX SIZE

In [None]:
box_dimension_df['volume'] = box_dimension_df['length'] * box_dimension_df['width'] * box_dimension_df['height']
box_dimension_df.sort_values(['volume', 'id'], inplace=True)
box_dimension_df.drop_duplicates('volume', inplace=True)
box_dimension_df

In [None]:
df = order_sku_df.copy()
df = df.merge(sku_dimension_df, how='left', on='sku_id')

df = df.dropna()

df

In [None]:
box_numbers = box_dimension_df['box_number'].values
xs = box_dimension_df['length'].values
ys = box_dimension_df['width'].values
zs = box_dimension_df['height'].values

def solve(order_df: pd.DataFrame):
    for box_number, x, y, z in zip(box_numbers, xs, ys, zs):
        packer = Packer()
        packer.add_bin(Bin(box_number, x, y, z, MAX_WEIGHT))

        for _, row in order_df.iterrows():
            for _ in range(row['quantity']):
                packer.add_item(Item(row['sku_id'], row['length'], row['width'], row['height'], 1))

        packer.pack()

        for b in packer.bins:
            if len(b.unfitted_items) == 0:
                return box_number
            
    return 'UNFITTED'

In [None]:
ddf = df.copy()

res_matrix = []
for order_number, order_df in tqdm(ddf.groupby('order_number')):
    res_matrix.append([order_number, solve(order_df)])
    
res_df = pd.DataFrame(data=res_matrix, columns=['order_number', 'box_number'])

In [None]:
## CLEAN FINAL DATAFRAME

to_basket_df = order_sku_df.copy()

to_basket_df['total_basket_sku'] = to_basket_df.groupby(['order_number'])['sku_id'].transform('nunique')
to_basket_df['total_basket_pcs'] = to_basket_df.groupby(['order_number'])['quantity'].transform('sum')

basket_df = to_basket_df[['order_number','total_basket_sku','total_basket_pcs']]
basket_df.columns = ['order_number','total_sku','total_qty']
basket_df = basket_df.drop_duplicates()

In [None]:
final_df = res_df.merge(box_dimension_df[['box_number','box_dimension']], how='left', on='box_number')
final_df = final_df.merge(basket_df, how='left', on='order_number')

final_df

In [None]:
final_df = final_df.groupby(['box_dimension','box_number']).agg({'order_number' : ['count']}).reset_index()

final_df.columns = ['box_dimension','box_number','total_order']

final_df = final_df.sort_values(['total_order'],ascending=False)

In [None]:
final_df['all_order'] = final_df['total_order'].sum()
final_df['perc_of_order'] = round((final_df['total_order'] / final_df['all_order']) * 100,1)
final_df['perc_of_order'] = final_df['perc_of_order'].astype(str) + '%'

final_df.drop(['all_order'], inplace=True, axis=1)

final_df.head(5)