# Preparation

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%cd /content/gdrive/MyDrive/Dibimbing/Data

/content/gdrive/MyDrive/Dibimbing/Data


In [3]:
# Packages
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

  import pandas.util.testing as tm


In [4]:
# Data
df = pd.read_excel('PRODUCTIVITY DESEMBER 2021.XLSX')

# Data Profiling

In [5]:
df.sample(5)

Unnamed: 0,Queue,Product,Processor,Confirmation Date,Confirmation Time
222834,INTERNAL,970594.0,IDJUNAEDIZZ,2021-12-30,21:13:44
8511,OUTBOUND,970652.0,IDASODHA,2021-12-04,23:43:56
95764,INB-PROD,970057.0,IDTISNAZZ,2021-12-11,00:18:56
17617,INTERNAL,970500.0,IDARYADIRI,2021-12-02,04:58:35
134417,INBOUND,970651.0,IDJUNAEDIZZ,2021-12-15,07:14:13


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 238083 entries, 0 to 238082
Data columns (total 5 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   Queue              238083 non-null  object        
 1   Product            237023 non-null  float64       
 2   Processor          238083 non-null  object        
 3   Confirmation Date  238083 non-null  datetime64[ns]
 4   Confirmation Time  238083 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 9.1+ MB


Data Description : 

1. Queue : Workload Classification
2. Product : Product ID number
3. Processor : Warehouse order processor
4. Confirmation Date : Warehouse order confirmation date
5. Confirmation Time : Warehouse order confirmation time

Notes :
Pay attention to the quantity for every Product. The product have different quantity in which sum up in pallet quantity (you can assume that 1 pallet for each row).

In [7]:
df['Queue'].unique()

array(['OUTBOUND', 'OUT-CONT', 'O-PND-OUT', 'NARROW-RPL', 'NARROW-OUT',
       'NARROW', 'INTERNAL', 'INBOUND', 'INB-PROD'], dtype=object)

In [8]:
df['Queue'].value_counts()

INBOUND       84343
OUTBOUND      53410
INB-PROD      40316
OUT-CONT      23975
INTERNAL      12398
NARROW-OUT     7999
NARROW         7809
O-PND-OUT      7336
NARROW-RPL      497
Name: Queue, dtype: int64

# Handling Missing Value

In [9]:
#cek missing value -> Mendeteksi missing value menggunakan NA
missing_data = df.isnull().sum(axis=0).reset_index()
missing_data.columns = ['variable', 'missing values']
missing_data['filling factor (%)']=(df.shape[0]-missing_data['missing values'])/df.shape[0]*100
missing_data.sort_values('filling factor (%)').reset_index(drop = True)

Unnamed: 0,variable,missing values,filling factor (%)
0,Product,1060,99.554777
1,Queue,0,100.0
2,Processor,0,100.0
3,Confirmation Date,0,100.0
4,Confirmation Time,0,100.0


In [10]:
df = df.dropna()
df['Queue'].value_counts()

INBOUND       84328
OUTBOUND      53383
INB-PROD      40316
OUT-CONT      23975
INTERNAL      12368
NARROW-OUT     7999
NARROW         7809
O-PND-OUT      6348
NARROW-RPL      497
Name: Queue, dtype: int64

In [11]:
df['Product'] = df['Product'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 237023 entries, 0 to 238082
Data columns (total 5 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   Queue              237023 non-null  object        
 1   Product            237023 non-null  int64         
 2   Processor          237023 non-null  object        
 3   Confirmation Date  237023 non-null  datetime64[ns]
 4   Confirmation Time  237023 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 10.9+ MB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


# Modeling Preparation

In [12]:
# create new dataframe for model
df_model = pd.DataFrame(columns = ['Date', 'Queue'])
df_model['Date'] = df['Confirmation Date']
df_model['Queue'] = df['Queue']
df_model

Unnamed: 0,Date,Queue
0,2021-12-01,OUTBOUND
1,2021-12-01,OUTBOUND
2,2021-12-01,OUTBOUND
3,2021-12-01,OUTBOUND
4,2021-12-01,OUTBOUND
...,...,...
238078,2021-12-31,INB-PROD
238079,2021-12-31,INB-PROD
238080,2021-12-31,INB-PROD
238081,2021-12-31,INB-PROD


In [13]:
# add Count for target variable
df_model = df_model.groupby(['Date', 'Queue']).size().reset_index(name="Count")
df_model['Count'] = df_model['Count'].astype(int)
df_model.head(8)

Unnamed: 0,Date,Queue,Count
0,2021-12-01,INB-PROD,2031
1,2021-12-01,INBOUND,2970
2,2021-12-01,INTERNAL,531
3,2021-12-01,NARROW,550
4,2021-12-01,NARROW-OUT,171
5,2021-12-01,NARROW-RPL,10
6,2021-12-01,O-PND-OUT,149
7,2021-12-01,OUT-CONT,1442


In [14]:
# Pivotting table
df_pivot = df_model.pivot( index='Date', columns='Queue' , values='Count' )
df_pivot

Queue,INB-PROD,INBOUND,INTERNAL,NARROW,NARROW-OUT,NARROW-RPL,O-PND-OUT,OUT-CONT,OUTBOUND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-01,2031.0,2970.0,531.0,550.0,171.0,10.0,149.0,1442.0,1955.0
2021-12-02,2186.0,3405.0,658.0,398.0,374.0,25.0,283.0,927.0,2268.0
2021-12-03,1644.0,3517.0,386.0,386.0,201.0,36.0,172.0,733.0,2040.0
2021-12-04,803.0,3195.0,303.0,249.0,342.0,31.0,225.0,815.0,2301.0
2021-12-05,,1727.0,145.0,28.0,41.0,4.0,41.0,10.0,481.0
2021-12-06,556.0,2799.0,376.0,483.0,107.0,20.0,201.0,821.0,1608.0
2021-12-07,1702.0,3459.0,422.0,330.0,224.0,16.0,251.0,834.0,2103.0
2021-12-08,1703.0,3892.0,647.0,314.0,217.0,27.0,176.0,1038.0,2058.0
2021-12-09,2242.0,4492.0,566.0,266.0,315.0,24.0,309.0,773.0,2269.0
2021-12-10,2082.0,3127.0,434.0,321.0,162.0,24.0,169.0,715.0,2151.0


In [15]:
# fill NaN value
df_pivot = df_pivot.fillna((df_pivot.median().round()))
df_pivot

Queue,INB-PROD,INBOUND,INTERNAL,NARROW,NARROW-OUT,NARROW-RPL,O-PND-OUT,OUT-CONT,OUTBOUND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-01,2031.0,2970.0,531.0,550.0,171.0,10.0,149.0,1442.0,1955.0
2021-12-02,2186.0,3405.0,658.0,398.0,374.0,25.0,283.0,927.0,2268.0
2021-12-03,1644.0,3517.0,386.0,386.0,201.0,36.0,172.0,733.0,2040.0
2021-12-04,803.0,3195.0,303.0,249.0,342.0,31.0,225.0,815.0,2301.0
2021-12-05,1702.0,1727.0,145.0,28.0,41.0,4.0,41.0,10.0,481.0
2021-12-06,556.0,2799.0,376.0,483.0,107.0,20.0,201.0,821.0,1608.0
2021-12-07,1702.0,3459.0,422.0,330.0,224.0,16.0,251.0,834.0,2103.0
2021-12-08,1703.0,3892.0,647.0,314.0,217.0,27.0,176.0,1038.0,2058.0
2021-12-09,2242.0,4492.0,566.0,266.0,315.0,24.0,309.0,773.0,2269.0
2021-12-10,2082.0,3127.0,434.0,321.0,162.0,24.0,169.0,715.0,2151.0


# Data Modeling

In [16]:
# contrived dataset
data_inbprod = df_pivot['INB-PROD'].values
data_inbound = df_pivot['INBOUND'].values
data_internal = df_pivot['INTERNAL'].values
data_narrow = df_pivot['NARROW'].values
data_narrowo = df_pivot['NARROW-OUT'].values
data_narrowr = df_pivot['NARROW-RPL'].values
data_pndo = df_pivot['O-PND-OUT'].values
data_outcont = df_pivot['OUT-CONT'].values
data_outbound = df_pivot['OUTBOUND'].values

In [17]:
# fit model to INB-PROD
model_inbprod = ExponentialSmoothing(data_inbprod)
model_fit = model_inbprod.fit()
# make prediction
yhat_inbprod = model_fit.predict(len(data_inbprod), len(data_inbprod))
print(yhat_inbprod)

[738.78628173]


In [18]:
# fit model to INBOUND data
model_inbound = ExponentialSmoothing(data_inbound)
model_fit = model_inbound.fit()
# make prediction
yhat_inbound = model_fit.predict(len(data_inbound), len(data_inbound))
print(yhat_inbound)

[2277.8718044]


In [19]:
# fit model to INTERNAL data
model_internal = ExponentialSmoothing(data_internal)
model_fit = model_internal.fit()
# make prediction
yhat_internal = model_fit.predict(len(data_internal), len(data_internal))
print(yhat_internal)

[426.4828159]


In [20]:
# fit model to NARROW data
model_narrow = ExponentialSmoothing(data_narrow)
model_fit = model_narrow.fit()
# make prediction
yhat_narrow = model_fit.predict(len(data_narrow), len(data_narrow))
print(yhat_narrow)

[269.27571635]


In [21]:
# fit model to NARROW-OUT data
model_narrowo = ExponentialSmoothing(data_narrowo)
model_fit = model_narrowo.fit()
# make prediction
yhat_narrowo = model_fit.predict(len(data_narrowo), len(data_narrowo))
print(yhat_narrowo)

[275.8275458]


In [22]:
# fit model to NARROW-RPL data
model_narrowr = ExponentialSmoothing(data_narrowr)
model_fit = model_narrowr.fit()
# make prediction
yhat_narrowr = model_fit.predict(len(data_narrowr), len(data_narrowr))
print(yhat_narrowr)

[17.68965458]


In [23]:
# fit model to O-PND-OUT data
model_pndo = ExponentialSmoothing(data_pndo)
model_fit = model_pndo.fit()
# make prediction
yhat_pndo = model_fit.predict(len(data_pndo), len(data_pndo))
print(yhat_pndo)

[218.89657761]


In [24]:
# fit model to OUT-CONT data
model_outcont = ExponentialSmoothing(data_outcont)
model_fit = model_outcont.fit()
# make prediction
yhat_outcont = model_fit.predict(len(data_outcont), len(data_outcont))
print(yhat_outcont)

[855.55184409]


In [25]:
# fit model to OUTBOUND data
model_outbound = ExponentialSmoothing(data_outbound)
model_fit = model_outbound.fit()
# make prediction
yhat_outbound = model_fit.predict(len(data_outbound), len(data_outbound))
print(yhat_outbound)

[1840.79540006]


In [26]:
# NOTES ! This is personalized by each factory/warehouse.
yhat_fls = yhat_pndo + yhat_internal
yhat_rts = yhat_narrow + yhat_narrowo + yhat_narrowr

In [27]:
print('Next day prediction for Forklift Inbound Production workload is', yhat_inbprod, 'and so, the manpower need for this workload is', (yhat_inbprod/175).round().astype(int))
print('Next day prediction for Forklift Inbound STO workload is', yhat_inbound, 'and so, the manpower need for this workload is', (yhat_inbound/175).round().astype(int))
print('Next day prediction for Forklift Outbound workload is', yhat_outbound, 'and so, the manpower need for this workload is', (yhat_outbound/175).round().astype(int))
print('Next day prediction for Forklift Outbound Container workload is', yhat_outcont, 'and so, the manpower need for this workload is', (yhat_outcont/175).round().astype(int))
print('Next day prediction for Forklift Internal+PND workload is', yhat_fls, 'and so, the manpower need for this workload is', (yhat_fls/175).round().astype(int))
print('Next day prediction for Reachtruck workload is', yhat_rts, 'and so, the manpower need for this workload is', (yhat_rts/90).round().astype(int))

Next day prediction for Forklift Inbound Production workload is [738.78628173] and so, the manpower need for this workload is [4]
Next day prediction for Forklift Inbound STO workload is [2277.8718044] and so, the manpower need for this workload is [13]
Next day prediction for Forklift Outbound workload is [1840.79540006] and so, the manpower need for this workload is [11]
Next day prediction for Forklift Outbound Container workload is [855.55184409] and so, the manpower need for this workload is [5]
Next day prediction for Forklift Internal+PND workload is [645.37939351] and so, the manpower need for this workload is [4]
Next day prediction for Reachtruck workload is [562.79291673] and so, the manpower need for this workload is [6]
