In [117]:
!pip install pymdptoolbox



In [118]:
import numpy as np
import pandas as pd
import pickle
import itertools
from scipy import sparse
import mdptoolbox


### Warehouse Settings

In [119]:
warehouse_dim_x = 2
warehouse_dim_y = 2
warehouse_actions = ['store', 'restore']
warehouse_blocktypes = ['empty', 'red', 'blue', 'white']

###############
# |-3|-2|
# |-2|-1|0
###############

warehouse_action_reward = [-1,-2,-2,-3]
warehouse_action_not_possible = -100
warehouse_action_not_possible_store = -100
warehouse_action_not_possible_restore = -100

warehouse_size = warehouse_dim_x * warehouse_dim_y
warehouse_field_indices = range(1, warehouse_size +1)

print("Warehouse Size:", warehouse_size)
print("Warehouse Actions:", ", ".join(warehouse_actions))
print("Block Types: ", ", ".join(warehouse_blocktypes))

Warehouse Size: 4
Warehouse Actions: store, restore
Block Types:  empty, red, blue, white


### Loading Dataset

In [120]:
training_data = pd.read_csv('./res/warehousetraining.txt', sep="\t", names=['action', 'blocktype'])
print("Dataset Size:", training_data.size)
test_data = pd.read_csv('./res/warehouseorder.txt', sep="\t", names=['action', 'blocktype'])
print("Testset Size:", test_data.size)

print(training_data.head())

Dataset Size: 24216
Testset Size: 120
    action blocktype
0    store       red
1    store      blue
2    store     white
3  restore      blue
4  restore     white


### Single Probabilities

In [121]:
probability_map = training_data.copy()
probability_map = probability_map.groupby(['action', 'blocktype']).size().reset_index(name='probability')
probability_map['probability'] = probability_map['probability'].div(len(training_data))

print(probability_map.head(6))

    action blocktype  probability
0  restore      blue     0.125289
1  restore       red     0.246862
2  restore     white     0.127849
3    store      blue     0.125289
4    store       red     0.246862
5    store     white     0.127849


### Warehouse States

In [122]:
warehouse_blocktypes_without_empty = list(warehouse_blocktypes)
if 'empty' in warehouse_blocktypes_without_empty:
    warehouse_blocktypes_without_empty.remove('empty')

warehouse_dataframe_columns = []
for field in range(warehouse_size):
    warehouse_dataframe_columns.append("field"+str(field))

warehouse_dataframe_columns.append("action")
warehouse_dataframe_columns.append("blocktype")

warehouse_dataframe = pd.DataFrame(list(itertools.product(warehouse_blocktypes, warehouse_blocktypes, warehouse_blocktypes, warehouse_blocktypes, warehouse_actions, warehouse_blocktypes_without_empty)), columns=warehouse_dataframe_columns)
warehouse_dataframe = pd.merge(warehouse_dataframe, probability_map, on=["action", "blocktype"])

print(warehouse_dataframe)

     field0 field1 field2 field3   action blocktype  probability
0     empty  empty  empty  empty    store       red     0.246862
1     empty  empty  empty    red    store       red     0.246862
2     empty  empty  empty   blue    store       red     0.246862
3     empty  empty  empty  white    store       red     0.246862
4     empty  empty    red  empty    store       red     0.246862
...     ...    ...    ...    ...      ...       ...          ...
1531  white  white   blue  white  restore     white     0.127849
1532  white  white  white  empty  restore     white     0.127849
1533  white  white  white    red  restore     white     0.127849
1534  white  white  white   blue  restore     white     0.127849
1535  white  white  white  white  restore     white     0.127849

[1536 rows x 7 columns]


### Transition Probability Matrix

In [123]:
transition_probability_matrix = []

def store_transitions(current_state, place, dataframe):
    if current_state[place] == 'empty':
        for i in warehouse_field_indices:
            if (i == place):
                dataframe = dataframe.loc[dataframe.iloc[ : , i-1]== row [warehouse_size+2]]
            else:
                dataframe = dataframe.loc[dataframe.iloc[ : , i-1] == row [i]]
    else:
        for i in warehouse_field_indices:
            dataframe = dataframe.loc[dataframe.iloc[ : , i-1] == row [i]]
    return dataframe

def restore_transitions(current_state, place, dataframe):
    if current_state[place] == current_state[warehouse_size+2]:
        for i in warehouse_field_indices:
            if (i == place):
                dataframe = dataframe.loc[dataframe.iloc[ : , i-1]== 'empty']
            else:
                dataframe = dataframe.loc[dataframe.iloc[ : , i-1] == row [i]]
    else:
        for i in warehouse_field_indices:
            dataframe = dataframe.loc[dataframe.iloc[ : , i-1] == row [i]]
    return dataframe

for field in warehouse_field_indices: #We start from 1 as 0 is the starting position of the robot
    transition_probability_matrix_part = np.zeros((len(warehouse_dataframe),len(warehouse_dataframe)), dtype=np.float16)
    for entry in warehouse_dataframe.itertuples():
        next_states = []

        if (entry[warehouse_size+1] == 'store'):
            next_states = store_transitions(entry, field, warehouse_dataframe)

        elif(entry[warehouse_size+1] == 'restore'):
            next_states = restore_transitions(entry, field, warehouse_dataframe)

        for i in next_states.index:
            transition_probability_matrix_part.itemset((entry[0],i), round(next_states.loc[[i]].probability,4))

    transition_probability_matrix.append(transition_probability_matrix_part)

print(transition_probability_matrix)

[array([[0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       ...,
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ]],
      dtype=float16), array([[0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       ...,
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ]],
      dtype=float16), array([[0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],
       [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.1278],

### Reward Matrix

In [124]:
reward_matrix=[]

def reward_store(row, place, matrix):
    if row[place]=='0':
        matrix.append(warehouse_action_reward[place-1])
    else:
        matrix.append(warehouse_action_not_possible_store)
    return matrix

def reward_restore(row, place, matrix):
    if row[place]==row[warehouse_size+2]:
        matrix.append(warehouse_action_reward[place-1])
    else:
        matrix.append(warehouse_action_not_possible_restore)
    return matrix

for row in warehouse_dataframe.itertuples():
    reward_matrix_part = []
    for place in warehouse_field_indices: #We start from 1 as 0 is the starting position of the robot
        if (row[warehouse_size+1] == 'store'):
            reward_matrix_part = reward_store(row, place, reward_matrix_part)
        if (row[warehouse_size+1] == 'restore'):
            reward_matrix_part = reward_restore(row, place, reward_matrix_part)
    reward_matrix.append(reward_matrix_part)

reward_matrix = np.array(reward_matrix)
print(reward_matrix)

[[-100 -100 -100 -100]
 [-100 -100 -100 -100]
 [-100 -100 -100 -100]
 ...
 [  -1   -2   -2 -100]
 [  -1   -2   -2 -100]
 [  -1   -2   -2   -3]]


### Verify Matrices

In [125]:
mdpcheck = mdptoolbox.util.check(transition_probability_matrix, reward_matrix)

if mdpcheck is None:
    print("No errors during matrices check")


No errors during matrices check


### MDP Run

In [126]:
mdp_policy = mdptoolbox.mdp.PolicyIteration(transition_probability_matrix,reward_matrix,0.95, max_iter=100)
mdp_value = mdptoolbox.mdp.ValueIteration(transition_probability_matrix,reward_matrix,0.95, max_iter=100)

print("Running MDP policy...")
mdp_policy.run()

print("Running MDP value...")
mdp_value.run()

Running MDP policy...
Running MDP value...


## MDP Evaluation

### Greedy Algorithm for Smart Factory

In [127]:
current_warehouse_state = np.full((warehouse_dim_x * warehouse_dim_x), 'empty')
warehouse_performance = 0
warehouse_not_processable_counter = 0

for order in test_data.itertuples():
    order_processable = False

    for field in warehouse_field_indices:
        if order.action == 'store':
            if current_warehouse_state[field-1] == 'empty':
                current_warehouse_state[field-1] = order.blocktype
                warehouse_performance +=warehouse_action_reward[field-1]
                order_processable = True
        elif order.action == 'restore':
            if current_warehouse_state[field-1] == order.blocktype:
                current_warehouse_state[field-1] = 'empty'
                warehouse_performance +=warehouse_action_reward[field-1]
                order_processable = True
    if not order_processable:
        warehouse_performance += warehouse_action_not_possible
        warehouse_not_processable_counter += 1

print("Warehouse performance:", warehouse_performance)
print("Number of not processable orders:", warehouse_not_processable_counter)

Warehouse performance: -4344
Number of not processable orders: 42


### MDP Test

In [128]:
current_warehouse_state = np.full((warehouse_dim_x * warehouse_dim_x), 'empty')
warehouse_performance = 0
warehouse_not_processable_counter = 0

for order in test_data.itertuples():
    order_processable = False

    order_dataframe = warehouse_dataframe

    for field in warehouse_field_indices:
        order_dataframe = order_dataframe.loc[order_dataframe.iloc[ : , field-1]== str(current_warehouse_state[field-1])]

    order_dataframe = order_dataframe.loc[order_dataframe['action'] == order.action]
    order_dataframe = order_dataframe.loc[order_dataframe['blocktype'] == order.blocktype]

    policy_index = mdp_policy.policy[order_dataframe.index[0]]
    warehouse_performance += warehouse_action_reward[pol]

    if order.action == 'store':
        if (current_warehouse_state [policy_index] == order.blocktype):
            warehouse_not_processable_counter += 1
            warehouse_performance = warehouse_action_not_possible - warehouse_action_reward[policy_index]
        current_warehouse_state [policy_index] = order.blocktype
    elif order.action == 'restore':
        if(current_warehouse_state [policy_index] == 'empty'):
            warehouse_not_processable_counter += 1
            warehouse_performance = warehouse_action_not_possible - warehouse_action_reward[policy_index]
        current_warehouse_state [policy_index] = 'empty'

print("Warehouse performance:", warehouse_performance)
print("Number of not processable orders:", warehouse_not_processable_counter)

Warehouse performance: -99
Number of not processable orders: 25


### MDP Output

In [129]:
print("MDP policy:")
print(mdp_policy.policy)
print(mdp_policy.V)
print(mdp_policy.iter)

print("MDP value:")
print(mdp_value.policy)
print(mdp_value.V)
print(mdp_value.iter)

MDP policy:
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 