In [1]:
from SD_IB_IRP_PPenv import steroid_IRP
from random import choice, randint
from termcolor import colored

### Auxiliary functions for the demo Notebook ###
def print_valid(statement):  
    if statement:  return 'Passed', 'green'
    else: return 'Failed', 'red'

def gen_generic_dic(env):
    ret_dic = {(k,o): 0 for k in env.Suppliers for o in env.Ages[k]}
    return ret_dic

# Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products Environment
by: Juan Betancourt

## Powelleskian model

### State ($S_t$)
-   **Physical State** ($R_t$):

        state:  Current available inventory: (dict)  Inventory of product k \in K of age o \in O_k. Since the state is the inventory before the decision is taken, there is no age 0 inventory.
                
        When backlogs are activated, will appear under age 'B'   
                                           

-   **Other deterministic info** ($Z_t$):

        p: Prices: (dict) Price of product k \in K at supplier i \in M
    
        q: Available quantities: (dict) Available quantity of product k \in K at supplier i \in M
    
        h: Holding cost: (dict) Holding cost of product k \in K
    
        historical_data: (dict) Historical log of information (optional)
    
-   **Belief State** ($B_t$):
    
        sample_paths: Simulated sample paths (optional)


### Actions ($X_t$)
The action can be seen as a three level-decision. These are the three layers:

1. Routes to visit the selected suppliers

2. Quantities to purchase from each supplier

3. Demand compliance plan, dispatch decision

4. (Optional) Backlogs compliance

Accordingly, the action will be a list composed as follows:

$$ X = [\text{routes, purchase, demand compliance, backlogs compliance}] $$

        routes: (list) list of lists, each with the nodes visited on the route (including departure and arriving to the depot)

        purchase: (dict) Units to purchase of product k \in K at supplier i \in M

        demand_compliance: (dict) Units of product k in K of age o \in O_k used to satisfy the demand

        backlogs_compliance: (dict) Units of product k in K of age o \in O_k used to satisfy the backlogs

### Exogenous information ($W_t$)!!!!!!


### Transition Function ($S_M^X$)

### Cost Function ($C_t$)

## INITIALIZATION

### Time Horizon: Two time horizon types (horizon_type = 'episodic')

1. 'episodic': Every episode (simulation) has a finite number of time steps
    
    Related parameters:
        
        - T: Decision periods (time-steps)
    
        
2. !!!NOT DEVELOPED!!! 'continuous': Never-ending episodes
    
    Related parameters: 
        
        - gamma: Discount factor

In [2]:
horizon_type = 'episodic'

T = 4

### Look-ahead approximation: Generation of sample paths (look_ahead = ['d']):

1. List of parameters to be forecasted on the look-ahead approximation ['d', 'p', ...]

2. List with '*' to generate forecasts for all parameters

3. False for no sample path generation

Related parameters:

    - S: Number of sample paths
    
    - LA_horizon: Number of look-ahead periods, includes the current decision period.

In [3]:
look_ahead = ['*']

S = 2
LA_horizon = 3

### Historical data: Generation or usage of historical data (historical_data = ['d'])   

1. ['d', 'p', ...]: List with the parameters the historical info will be generated for

2.  ['*']: Historical info generated for all parameters

3. !!!NOT DEVELOPED!!! path: File path to be processed by upload_historical_data() 

4.  False: No historical data will be used

Related parameter:
    
    - hist_window: Initial log size (time periods)

In [4]:
historical_data = ['*']

hist_window = 10

### Backorders: Catch unsatisfied demand (backorders = False):

1. 'backorders': Demand may be not fully satisfied. Non-complied orders will be automatically fullfilled with an extra-cost

2. 'backlogs': Demand may be not fully satisfied. Non-complied orders will be registered and kept track of on age 'B'

3. False: All demand must be fullfilled

Related parameters:

    - back_o_cost = 20
    - back_l_cost = 20

In [5]:
backorders =  False

### Additional environment parameters

    -   M = 10: Number of suppliers

    -   K = 10: Number of Products

    -   F = 2:  Number of vehicles on the fleet

    -   T = 6:  Number of decision periods

    -   wh_cap = 1e9: Warehouse capacity

    -   (min_sprice,max_sprice): Max and min selling prices (per m and k)

    -   (min_hprice,max_hprice): Max and min holding cost (per k)

    -   penalization_cost: Penalization costs for RL (invalid actions, etc.)

    -   S = 4:  Number of sample paths 

    -   LA_horizon = 5: Number of look-ahead periods

    -   lambda1 = 0.5: Controls demand, ensures feasibility

In [6]:
env_config = {  'M': 3, 
                'K': 3, 
                'T': T, 
                'F': 2, 
                
                'min_sprice': 1, 
                'max_sprice': 500, 
                'min_hprice': 1, 
                'max_hprice': 500,
                
                'S': S, 
                'LA_horizon': LA_horizon, 
                'lambda1': 0.5
            }
            

# Creating an environment

The environment receives all the main parameters plus a random seed as rd_seed, and a dictionary env_config with the other customizable parameters
    
    -   rd_seed: Seed for random number generation

    -   env_config: Dictionary with custom environment parameters

In [7]:
rd_seed = 0
env = steroid_IRP( horizon_type = horizon_type, 
                   look_ahead = look_ahead, 
                   historical_data = historical_data, 
                   backorders = backorders,
                   rd_seed = rd_seed, 
                   env_config = env_config)
repr(env)

'Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products instance. V = 3; K = 3; F = 2'

# Resetting the environment

Once the environment is created, or everytime it will be run again from the start, it must be reset. For this, the class has the reset() method which receives the following boolean parameter:

    -   return_state: Indicates if reset() returns the initial state

In [8]:
return_state = True
state, _ = env.reset(return_state = return_state)

# Retrieving information from the environment

Once the environmnet has been reset, all the initial values can be accessed

    - Inventory[k,o]

In [9]:
print(f'State: {state} \n')

#product = choice(env.Products); age = randint(1, env.O_k[product])
#print(env.O_k[product])
#print(f'The inventory of product {product} and age {age} is {env.state[product,age]}')

State: {(0, 1): 0, (0, 2): 0, (0, 3): 0, (0, 4): 0, (1, 1): 0, (1, 2): 0, (1, 3): 0, (1, 4): 0, (2, 1): 0} 



The environment has a built-in method that helps visualize the state as a dataframe

In [10]:
env.print_inventory()

Ages,1,2,3,4
Products,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0.0,0.0,0.0
1,0,0.0,0.0,0.0
2,0,,,


    -   Available quantities [i,k]
    
    -   Prices [i,k]
    
    -   Holding cost [k]
    
    -   Demand [k]

In [11]:
#print(env.q, '\n')
supplier = choice(env.Suppliers); product = choice(env.Products)
print(f'Supplier {supplier} offers {env.q[supplier, product]} of product {product}')

#print(env.p, '\n')
print(f'Supplier {supplier} offers product {product} for ${env.p[supplier, product]} a unit')

#print(env.h, '\n')
product = choice(env.Products)
print(f'Holding cost of product {product} is ${env.h[product]}')

#print(env.h, '\n')
product = choice(env.Products)
print(f'Demand of product {product} is {env.d[product]}')

Supplier 1 offers 8 of product 0
Supplier 1 offers product 0 for $480 a unit
Holding cost of product 0 is $295
Demand of product 1 is 16.0


Historical data

    -   Available quantiites
    
    -   Prices
    
    -   Holding cost 
    
    -   Demand

In [12]:
supplier = choice(env.Suppliers); product = choice(env.Products)

historical_quantities = env.historical_data['q'][supplier, product]
print(f'The historical a.q. from supplier {supplier}, of product {product} are {historical_quantities} \n')

historical_demand = env.historical_data['d'][product]
print(f'The historical demand of produdct {product} is {historical_demand}')

The historical a.q. from supplier 1, of product 0 are [0, 0, 15, 1, 5, 3, 14, 0, 9, 12, 2, 0, 5, 0, 7, 0, 14, 0, 6, 0, 5, 0, 0, 7, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 9, 0, 0, 0, 15, 0] 

The historical demand of produdct 0 is [0.0, 0.0, 20.0, 12.5, 13.5, 3.0, 16.0, 0.0, 12.5, 12.0, 2.0, 0.0, 5.0, 0.0, 8.0, 6.0, 16.0, 0.0, 6.0, 0.0, 7.5, 0.0, 0.0, 13.5, 14.0, 0.0, 3.0, 0.0, 0.0, 0.0, 8.0, 4.0, 6.0, 0.0, 9.0, 0.0, 0.0, 0.0, 16.0, 2.0]


Sample paths

    -   Available quantities
    
    -   Prices
    
    -   Holding cost 
    
    -   Demand

In [13]:
sample = choice(env.Samples)
proy_day = randint(1, env.LA_horizon - 1)
supplier = choice(env.Suppliers); product = choice(env.Products)

proy_quant = env.sample_paths[('q',sample)][(supplier, product, proy_day)]
print(f'On sample path {sample} the forecasted available quantity of product {product}, on supplier {supplier}, for day {proy_day} is {proy_quant}')

proy_demand = env.sample_paths[('d',sample)][product, proy_day]
print(f'On sample path {sample} the forecasted demand of product {product}, for day {proy_day} is {proy_demand}')

On sample path 0 the forecasted available quantity of product 0, on supplier 2, for day 2 is 13
On sample path 0 the forecasted demand of product 0, for day 2 is 13.5


# Sanity check

    - First day of forecast is the realized random value

In [14]:
sample = choice(env.Samples)
proy_day = 0
supplier = choice(env.Suppliers); product = choice(env.Products)

test_q, col_q = print_valid(env.sample_paths["q",sample][supplier,product,proy_day] == env.q[supplier,product])
print('Test q:', colored(test_q, col_q))
test_p, col_p = print_valid(env.sample_paths["p",sample][supplier,product,proy_day] == env.p[supplier,product])
print('Test p:', colored(test_p, col_p))
test_h, col_h = print_valid(env.sample_paths["h",sample][product,proy_day] == env.h[product])
print('Test h:', colored(test_h, col_h))
test_d, col_d = print_valid(env.sample_paths["d",sample][product,proy_day] == env.d[product])
print('Test h:', colored(test_d, col_d))

Test q: [32mPassed[0m
Test p: [32mPassed[0m
Test h: [32mPassed[0m
Test h: [32mPassed[0m


# Interacting with the environment 

The environment is designed to be fully dynamic and responsive to the user. To do this, the user will interact through the step() method. This method receives an action and makes the corresponding transitions on the environment (i.e., update the inventory, prices, quantities, compute the action's cost, etc.). Here, a full episode is shown step by step, emphasizing on relevant aspects and trasitions to help the user understand the environment's logic.

First, the initial state, demand and available quantities are retrieved and analized. The actions provided to de environment must be feasible in all dimensions of the problem (routing, purchasing and demand compliance). 

In [15]:
print(f'######################################## Time step {env.t} ########################################')
print(f'State s_{env.t}:')
print(env.print_inventory(), '\n ')
print(f'Demand d_{env.t}: {env.d}')
print(f'Available quantities q_{env.t}: {env.q}')

# x = env.historical_data['q'][1,0]
# print(f'Historical of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}') 

######################################## Time step 0 ########################################
State s_0:
Ages      1    2    3    4
Products                  
0         0  0.0  0.0  0.0
1         0  0.0  0.0  0.0
2         0  NaN  NaN  NaN 
 
Demand d_0: {0: 10.0, 1: 16.0, 2: 0.0}
Available quantities q_0: {(1, 0): 8, (1, 1): 11, (1, 2): 0, (2, 0): 4, (2, 1): 10, (2, 2): 0}


A single route will visit all the suppliers. To test the inventory transitions and the cost of the actions, all available items will be purchased (assuming enough vehicle capacity). The demand will be fully satisfied with the purchased products (age 0)

In [16]:
# Visiting all the suppliers in a single route
routes = [[0,1,2,0]]

# Purchase all available quantities
          # (supplier,product)
purchase = {(1,0): 8, (2,0): 4,     # product 0: 12 units
            (1,1): 11, (2,1): 10,   # product 1: 21 units
            (1,2): 0, (2,2): 0}     # product 2:  0 units

# Demand compliance
                    #(Product,age) 
demand_compliance = {(0,0): 10, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                     (1,0): 16, (1,1): 0, (1,2): 0, (1,3): 0, (1,4): 0,
                     (2,0): 0,  (2,1): 0}

action = [routes, purchase, demand_compliance]

The action's cost is computed manually as the sum of the routing, purchasing and holding costs

In [17]:
transport_cost = env.c[routes[0][0], routes[0][1]] + env.c[routes[0][1], routes[0][2]] + env.c[routes[0][2], routes[0][3]]
purchase_cost = 0
for i in env.Suppliers:
    for k in env.Products:
        purchase_cost += purchase[i,k] * env.p[i,k]
holding_cost = env.h[1] * 5 + env.h[0] * 2
total_cost = transport_cost + purchase_cost + holding_cost 
print(f'The total cost of the action is: {total_cost}')

The total cost of the action is: 10803


With this **valid** action, the step method can be called. The input parameters 'validate_action' and 'warnings' will determine whether an action is valid and raise warnings if an undersirable conduct occurs, respectively. This method returns:
    
    -   state: New state, inventory levels before the next decision
    -   reward: The total cost of the action (transport, purchase and holding)
    -   done: Indicates if the episode has finished
    -   _: Extra information: (Dict) Parameters, historics and sample paths

The reward computed by the environment is correct. The inventory for the products 1 and 2 matches the demand and purchase ($12-10=2$ and $21-16=5$, respectively). The demand and other parameters are updated for the next decision step. 

In [18]:
state, reward, done, _ = env.step(action = action, validate_action = True)

print(f'The computated cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'################################### Time step {env.t} ###################################')
print(f's_{env.t}:')
print(f'{env.print_inventory()}\n')
print(f'Demand: d_{env.t}: {env.d}')
print(f'Available quantities: q_{env.t}: {env.q}')

The computated cost of the action is 10803
Episode finished: False 

################################### Time step 1 ###################################
s_1:
Ages      1    2    3    4
Products                  
0         2  0.0  0.0  0.0
1         5  0.0  0.0  0.0
2         0  NaN  NaN  NaN

Demand: d_1: {0: 13.0, 1: 20.5, 2: 4.0}
Available quantities: q_1: {(1, 0): 8, (1, 1): 11, (1, 2): 0, (2, 0): 9, (2, 1): 15, (2, 2): 4}


Again, the same route and purchasing strategy are used. In this step, one of the items stored of product 1 (of age 1) will be used 

In [19]:
# Visiting all the suppliers
routes = [[0,1,2,0]]

# Purchase exact quantity for 
purchase = {(1,0): 8,  (2,0): 9,    # product 0: 17 units
            (1,1): 11, (2,1): 15,   # product 1: 26 units
            (1,2): 0,  (2,2): 4}    # product 2: 4 units

# Demand complience
demand_complience = {(0,0): 13, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                     (1,0): 19.5, (1,1): 1, (1,2): 0, (1,3): 0,  (1,4): 0,
                     (2,0): 4, (2,1): 0}

action = [routes, purchase, demand_complience]

Product 0 has ($17-13=4$) of age 1 and 2 of age 2. Product 1 has 6.5 items ($26-19,5$) of age 1 and 4 ($5-1=4$) of age 2 

In [20]:
state, reward, done, _  = env.step(action = action, validate_action = True)
print(f'The computed cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'################################### Time step {env.t} ###################################')
print(f's_{env.t}:')
print(f'{env.print_inventory()}\n')
print(f'Demand: d_{env.t}: {env.d}')
print(f'Available quantities: q_{env.t}: {env.q}')

The computed cost of the action is 23248.5
Episode finished: False 

################################### Time step 2 ###################################
s_2:
Ages        1    2    3    4
Products                    
0         4.0  2.0  0.0  0.0
1         6.5  4.0  0.0  0.0
2         0.0  NaN  NaN  NaN

Demand: d_2: {0: 0.0, 1: 0.0, 2: 1.0}
Available quantities: q_2: {(1, 0): 0, (1, 1): 0, (1, 2): 0, (2, 0): 0, (2, 1): 0, (2, 2): 1}


The same action strategy is used. No inventory remains from the purchase

In [21]:
# Visiting all the suppliers
routes = [[0,1,2,0]]

# Purchase exact quantity for 
purchase = {(1,0): 0, (2,0): 0,    # product 0: 0 units
            (1,1): 0, (2,1): 0,    # product 1: 0 units
            (1,2): 0, (2,2): 1}    # product 2: 1 units

# Demand compliance
demand_compliance = {(0,0): 0, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                     (1,0): 0, (1,1): 0, (1,2): 0, (1,3): 0, (1,4): 0,
                     (2,0): 1, (2,1): 0}

X = [routes, purchase, demand_compliance]

In [22]:
state, reward, done, _  = env.step(action = X, validate_action = True)
print(f'The computed cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'################################### Time step {env.t} ###################################')
print(f's_{env.t}:')
print(f'{env.print_inventory()}\n')
print(f'Demand: d_{env.t}: {env.d}')
print(f'Available quantities: q_{env.t}: {env.q}')

The computed cost of the action is 3460.0
Episode finished: False 

################################### Time step 3 ###################################
s_3:
Ages      1    2    3    4
Products                  
0         0  4.0  2.0  0.0
1         0  6.5  4.0  0.0
2         0  NaN  NaN  NaN

Demand: d_3: {0: 0.0, 1: 14.0, 2: 6.0}
Available quantities: q_3: {(1, 0): 0, (1, 1): 14, (1, 2): 0, (2, 0): 0, (2, 1): 0, (2, 2): 6}


In [23]:
# Visiting all the suppliers
routes = [[0,1,2,0]]

# Purchase exact quantity for 
#           (supplier,product)
purchase = {(1,0): 0,  (2,0): 0,   # product 0:  0 units
            (1,1): 14, (2,1): 0,   # product 1: 14 units
            (1,2): 0,  (2,2): 6}   # product 2:  6 units

# Demand compliance
#                   (product, age)
demand_compliance = {(0,0): 0, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                     (1,0): 14, (1,1): 0, (1,2): 0, (1,3): 0, (1,4): 0,
                     (2,0): 6, (2,1): 0}

X = [routes, purchase, demand_compliance]

Since the epiosde has ended ($t = 4$), the state doesn't update. 

In [24]:
state, reward, done, _  = env.step(action = X, validate_action = True)
print(f'The computed cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'################################### Time step {env.t} ###################################')
print(f's_{env.t}:')
print(f'{env.print_inventory()}\n')
print(f'Demand: d_{env.t}: {env.d}')
print(f'Available quantities: q_{env.t}: {env.q}')

The computed cost of the action is 6782.5
Episode finished: True 

################################### Time step 4 ###################################
s_4:
Ages      1    2    3    4
Products                  
0         0  4.0  2.0  0.0
1         0  6.5  4.0  0.0
2         0  NaN  NaN  NaN

Demand: d_4: {0: 0.0, 1: 14.0, 2: 6.0}
Available quantities: q_4: {(1, 0): 0, (1, 1): 14, (1, 2): 0, (2, 0): 0, (2, 1): 0, (2, 2): 6}


# Invalid actions

The environment has a complete action validator. When the 'action_validation' parameter of the step() method is True, the passed action's feasibility will be tested on routing, purchasing and demand compliance. Let's reset the environment and try a couple of invalid actions. There is a base feasible action. Below, various infeasible actions commented. 

In [25]:
env.reset()

print(f'################################### Time step {env.t} ###################################')
print(f's_{env.t}:')
print(f'{env.print_inventory()}\n')
print(f'Demand: d_{env.t}: {env.d}')
print(f'Available quantities: q_{env.t}: {env.q}')


################################### Time step 0 ###################################
s_0:
Ages      1  2    3    4
Products                
0         0  0  0.0  NaN
1         0  0  NaN  NaN
2         0  0  0.0  0.0

Demand: d_0: {0: 0.0, 1: 0.0, 2: 16.5}
Available quantities: q_0: {(1, 0): 0, (1, 1): 0, (1, 2): 7, (2, 0): 0, (2, 1): 0, (2, 2): 13}


In [26]:
#### Feasible action ####
routes = [[0,1,2,0]]
purchase = {(1,0): 0,  (2,0): 0,  
            (1,1): 0,  (2,1): 1,
            (1,2): 8, (2,2): 4}  
demand_compliance = {(0,0): 0, (0,1): 0, (0,2): 0,
                     (1,0): 1, (1,1): 0, (1,2): 0,
                     (2,0): 10, (2,1): 0}
demand_compliance = {(0,0): 0, (0,1): 0, (0,2): 0,
                     (1,0): 0, (1,1): 0, (1,2): 1,
                     (2,0): 10, (2,1): 0}
'''
### Infeasible actions ###
# Incomplete route
routes = [[0,1,2]]

# Visiting non-existing node
routes = [[0,1,4,0]]

# Buying non-available item
purchase = {(1,0): 1,  (2,0): 0,  
            (1,1): 0,  (2,1): 1,
            (1,2): 8, (2,2): 4} 

# Using non-purchased item
demand_compliance = {(0,0): 1, (0,1): 0, (0,2): 0,
                     (1,0): 1, (1,1): 0, (1,2): 0,
                     (2,0): 10, (2,1): 0}

# Using non-existing inventary items
demand_compliance = {(0,0): 0, (0,1): 0, (0,2): 0,
                     (1,0): 0, (1,1): 0, (1,2): 1,
                     (2,0): 10, (2,1): 0}
'''

action = [routes, purchase, demand_compliance]
env.step(action, validate_action = True)

  '''


AssertionError: Purchased quantities exceed suppliers' available quantities  (1,2)