In [3]:
import sys
sys.path.append("..")

In [4]:
import pandas as pd
import random
from src.simulation import simulate

In [5]:
assortment_df = pd.read_csv("data/assortment.csv",
                            dtype={"product_id": str,
                                   "product_group": str,
                                   })

orders_df = pd.read_csv("data/orders.csv",
                        parse_dates=['datetime'])

### Assortment
Each row in 'assortment_df' contains a product that we have in our assortment that could be ordered.

Column definitions:
* product_id: product identifier
* product_group: lower-level category of product
* size_group: size of the product
* cluster: higher-level category of product

### Orders
Each row in 'orders_df' contains an shop order line.

Column definitions:
* datetime: datetime when the shop order is placed
* order_id: order identifier
* product_id: product that is ordered

In [6]:
def generate_naive_stock_allocation(assortment):
    warehouse_allocations = random.choices([[True, True],[True, False], [False, True]],
                              k=len(assortment), weights=[0.1, 0.45, 0.45])

    return pd.concat([assortment['product_id'], pd.DataFrame(warehouse_allocations, columns=[
        'warehouseA', 'warehouseB'])], axis=1)

In [7]:
stock_allocation = generate_naive_stock_allocation(assortment_df)
stock_allocation.head()

Unnamed: 0,product_id,warehouseA,warehouseB
0,007e97352fdb433,True,False
1,32b8b71245fc430,True,False
2,d6825ec8a7d5409,False,True
3,93210367755d441,True,True
4,1302c851692140a,False,True


In [8]:
simulation_result = simulate(orders_df, stock_allocation)
simulation_result.show_aggregated_results()

Warehouse orders sourced: 121578
Total costs: 897498.1
Lost sales: 8906 products


In [9]:
simulation_result.warehouse_orders

Unnamed: 0,shop_order_id,warehouse_id,pack_line,products,warehouse_costs,shipment_costs,order_datetime,total_costs,quantity
0,00330b8e0df6,warehouseB,monoManual,[83ee1b0d14e148a],0.5,6.0,2021-01-01 18:50:36.719824,6.5,1
1,0349de8a0eb0,warehouseA,monoManual,[4f14c89b683a428],0.7,5.0,2021-01-01 18:57:30.679387,5.7,1
2,0349de8a0eb0,warehouseB,multiManual,"[650c3ad888a74f3, 30b1fbefd5ca45a, 9e571016c83...",4.2,6.0,2021-01-01 18:57:30.679387,10.2,6
3,051f784d56df,warehouseB,monoManual,[e758471031f34c2],0.5,6.0,2021-01-01 16:59:23.342815,6.5,1
4,06074fe2159d,warehouseA,monoManual,[5d4331c419a24ca],0.7,5.0,2021-01-01 22:35:34.041543,5.7,1
...,...,...,...,...,...,...,...,...,...
121573,f931cc63aa25,warehouseB,monoManual,[c669715883a9442],0.5,6.0,2021-12-30 15:47:19.274943,6.5,1
121574,fd3d162c659a,warehouseB,monoManual,[64a912c7dff64bf],0.5,6.0,2021-12-30 12:23:37.006524,6.5,1
121575,fe5c14b218f3,warehouseB,monoManual,[3ef8dc897dfc4d2],0.5,6.0,2021-12-30 15:45:00.543150,6.5,1
121576,fefdccbd189c,warehouseB,monoManual,[572aff5625d9420],0.5,6.0,2021-12-30 20:45:04.570772,6.5,1


In [10]:
simulation_result.lost_sales

Unnamed: 0,shop_order_id,order_datetime,product_id
0,e9588ae54f44,2021-01-05 16:17:04.976736,ef572fc53e1244f
1,eaaa7b535429,2021-01-05 18:33:25.703158,ad525ae3fac74e1
2,eb3031b98a19,2021-01-05 16:28:14.456537,4ffe48858f76491
3,ede3f7b81ad0,2021-01-05 14:11:48.085574,96f5fa1cd4ff4c7
4,ee69af0dd5b8,2021-01-05 21:03:14.508272,168896acf23149f
...,...,...,...
8901,f931cc63aa25,2021-12-30 15:47:19.274943,62e3040b6322463
8902,fa0858a42cba,2021-12-30 20:54:44.372585,810c38c9a7e94fe
8903,fc4a4217c8d0,2021-12-30 14:22:56.930373,b7236e8db94e4a8
8904,fda72877f233,2021-12-30 11:00:19.365373,0b64c0ea4f68450


# Base model

In [35]:

#Initialize an empty list to store rows
warehouse_rows = []

# Iterate over rows in product_df and populate the warehouse_df based on conditions
for index, row in assortment_df.iterrows():
    product_id = row['product_id']
    cluster = row['cluster']

    # Initialize warehouseA and warehouseB as False
    warehouseA = False
    warehouseB = False

    # Check conditions and update warehouse columns
    if cluster in ['toy', 'garden', 'winter']:
        warehouseA = True
    elif cluster in ['clothes']:
        warehouseB = True

    # Append a new row to the list
    warehouse_rows.append({'product_id': product_id, 'warehouseA': warehouseA, 'warehouseB': warehouseB})

# Create a DataFrame from the list of rows
warehouse_df = pd.DataFrame(warehouse_rows)

# Display the resulting warehouse_df
warehouse_df

Unnamed: 0,product_id,warehouseA,warehouseB
0,007e97352fdb433,False,False
1,32b8b71245fc430,False,False
2,d6825ec8a7d5409,False,False
3,93210367755d441,False,False
4,1302c851692140a,False,False
...,...,...,...
995,fd27949fe39645c,True,False
996,52047e1f1e96465,True,False
997,8028747200e8443,True,False
998,e1bac8e0a574429,True,False


In [36]:
warehouse_df.describe()

Unnamed: 0,product_id,warehouseA,warehouseB
count,1000,1000,1000
unique,1000,2,2
top,007e97352fdb433,False,False
freq,1,500,600


In [37]:
simulation_result = simulate(orders_df, warehouse_df)
simulation_result.show_aggregated_results()

Warehouse orders sourced: 92311
Total costs: 1270093.1
Lost sales: 67608 products


In [34]:
assortment_df.groupby("cluster").size()

cluster
clothes    400
garden     250
toys       100
winter     250
dtype: int64

In [38]:
warehouse_df.groupby("warehouseB").size()

warehouseB
False    600
True     400
dtype: int64

# total sales of the items

In [39]:
# Merge the DataFrames on the 'product_id' column
merged_df = pd.merge(orders_df, assortment_df, on='product_id', how='left')

# Calculate total sales for each product_id
total_sales_df = merged_df['product_id'].value_counts().reset_index()
total_sales_df.columns = ['product_id', 'total_sales']

# Sort the DataFrame by total sales in descending order
total_sales_df = total_sales_df.sort_values(by='total_sales', ascending=False)

# Display the resulting DataFrame with total sales sorted
total_sales_df

Unnamed: 0,product_id,total_sales
0,03b755d783fd4ec,644
1,62e3040b6322463,639
2,6b91277c742e439,634
3,36103fff52e640a,630
4,93210367755d441,627
...,...,...
995,0f44a9e6585b450,72
996,55a3f2db0b8846a,71
997,f98dbfdfecb7464,67
998,6e4120fb6f16405,65


In [214]:

#Initialize an empty list to store rows
warehouse_rows = []

# Iterate over rows in product_df and populate the warehouse_df based on conditions
for index, row in total_sales_df.iterrows():
    product_id = row['product_id']
    total_sales = row['total_sales']

    # Initialize warehouseA and warehouseB as False
    warehouseA = False
    warehouseB = False

    # Check conditions and update warehouse columns
    if (total_sales < 193) or (total_sales > 282):
        warehouseB = True
    if total_sales >= 142:
        warehouseA = True

    # Append a new row to the list
    warehouse_rows.append({'product_id': product_id, 'warehouseA': warehouseA, 'warehouseB': warehouseB, 'total_sales})

# Create a DataFrame from the list of rows
warehouse_df = pd.DataFrame(warehouse_rows)

# Display the resulting warehouse_df
warehouse_df

Unnamed: 0,product_id,warehouseA,warehouseB,total_sales
0,03b755d783fd4ec,False,True,644
1,62e3040b6322463,False,True,639
2,6b91277c742e439,False,True,634
3,36103fff52e640a,False,True,630
4,93210367755d441,False,True,627
...,...,...,...,...
995,0f44a9e6585b450,False,True,72
996,55a3f2db0b8846a,False,True,71
997,f98dbfdfecb7464,False,True,67
998,6e4120fb6f16405,False,True,65


In [215]:

percentiles_df = total_sales_df['total_sales'].quantile([0.2,0.4, 0.5,0.6, 0.75,0.8,0.90, 1]).reset_index(name='percentile')

In [216]:
percentiles_df

Unnamed: 0,index,percentile
0,0.2,97.0
1,0.4,142.4
2,0.5,193.0
3,0.6,202.0
4,0.75,217.0
5,0.8,222.0
6,0.9,281.5
7,1.0,644.0


In [217]:
warehouse_df.describe()

Unnamed: 0,total_sales
count,1000.0
mean,200.679
std,140.180857
min,64.0
25%,99.0
50%,193.0
75%,217.0
max,644.0


In [218]:
simulation_result = simulate(orders_df, warehouse_df)
simulation_result.show_aggregated_results()

Warehouse orders sourced: 128140
Total costs: 909162.0
Lost sales: 5825 products


In [219]:
both_true_df = warehouse_df[(warehouse_df['warehouseA'] == True) & (warehouse_df['warehouseB'] == True)]

# Display the resulting DataFrame
both_true_df


Unnamed: 0,product_id,warehouseA,warehouseB,total_sales


In [210]:
warehouse_df.groupby("warehouseB").count()

Unnamed: 0_level_0,product_id,warehouseA,total_sales
warehouseB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,403,403,403
True,597,597,597


In [211]:
warehouse_df.groupby("warehouseA").count()

Unnamed: 0_level_0,product_id,warehouseB,total_sales
warehouseA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,400,400,400
True,600,600,600


In [212]:
#b ye tek tercih edilen ve çok satan ürünleri ekleyelim
# a'nın kapasite düşük olduğu için orta segment ürünler

In [213]:
simulation_result.warehouse_orders.head(30)

Unnamed: 0,shop_order_id,warehouse_id,pack_line,products,warehouse_costs,shipment_costs,order_datetime,total_costs,quantity
0,00330b8e0df6,warehouseA,monoManual,[83ee1b0d14e148a],0.7,5.0,2021-01-01 18:50:36.719824,5.7,1
1,0349de8a0eb0,warehouseB,multiManual,"[9e571016c83f48c, 4f14c89b683a428, 30b1fbefd5c...",2.1,6.0,2021-01-01 18:57:30.679387,8.1,3
2,0349de8a0eb0,warehouseA,multiManual,"[589a31255ac5410, 650c3ad888a74f3, 26245953b65...",3.6,5.0,2021-01-01 18:57:30.679387,8.6,4
3,051f784d56df,warehouseA,monoManual,[e758471031f34c2],0.7,5.0,2021-01-01 16:59:23.342815,5.7,1
4,06074fe2159d,warehouseA,monoManual,[5d4331c419a24ca],0.7,5.0,2021-01-01 22:35:34.041543,5.7,1
5,066c8ad52448,warehouseB,multiManual,"[d241172391ce452, 1b38df5f1cbb4d4]",1.4,6.0,2021-01-01 22:40:29.796308,7.4,2
6,06834e141123,warehouseA,monoManual,[379f3ea122b5457],0.7,5.0,2021-01-01 21:42:41.512068,5.7,1
7,06c4433ccd33,warehouseA,monoManual,[3f92763d6f27426],0.7,5.0,2021-01-01 19:59:16.647936,5.7,1
8,07c6b2840ec5,warehouseA,monoManual,[55462138ca5d4b6],0.7,5.0,2021-01-01 14:07:08.932730,5.7,1
9,082824e3f286,warehouseA,multiManual,"[7d59758138f54bc, 5446e83ac9014db, 89038c7c240...",2.7,5.0,2021-01-01 17:05:27.890598,7.7,3
