In [2]:
from SD_IB_IRP_PPenv import steroid_IRP
from random import choice, randint
from termcolor import colored

rd_seed = 1

### Auxiliary functions for the demo
def print_valid(statement):  
    if statement:  return 'Passed', 'green'
    else: return 'Failed', 'red'

def gen_purchs_dic(env):
    ret_dic = {(i,k):0 for i in env.Suppliers for k in env.Products}
    return ret_dic

def gen_dem_dic(env):
    ret_dic = {(k,o): 0 for k in env.Suppliers for o in env.Ages[k]}
    return ret_dic

def print_state(env):
    state = 'State: \n'
    
    for k in env.Products:
        state += f'Product {k}: '
        for o in env.Ages[k]:   state += f'age {o}: {env.state[k,o]}; '
        if env.others['back_orders'] == 'back-logs':    state += f'BL: {env.state[k,"B"]}\n'
        else:   state += '\n'

    return state

# Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products environment
by: Juan Betancourt

## Powelleskian model

### State ($S_t$)
-   $R_t$ **Physical State**:

        state: Current inventory (dict) - Inventory of k \in K of age o \in O_k

-   $I_t$ **Other deterministic info**:

        p: Prices (dict) - Price of k \in K at i \in M
    
        q: Available quantities (dict) - Available quantities of k \in K at i \in M
    
        h: Holding cost (dict) - Holding cost of k \in K
    
        historic_data (dict) - Historic log of information (optional)
    
-   $B_t$ **Belief State**:
    
        sample_paths: Sample paths


### Actions ($X_t$)
The action can be seen as a three level-decision. These are the three layers:

1. Routes to visit selected suppliers

2. Quantity to purchase on each supplier

3. Demand complience plan, dispatch decision

Accordingly, the action will be a list composed as follows:

$$ X = [\text{routes, purchase, demand complience}] $$

        - routes (list): list of list with the nodes visited on a route (including departure and arriving to the depot)

        - purchase (dict): Units to purchase of k \in K at i \in M

        - demand_complience (dict): Units to use of k in K of age o \in O_k


## INITIALIZATION

### Time Horizon: Two time horizon types (horizon_type = 'episodic')

1. 'episodic': Every episode (simulation) has a finite number of steps
    
    Related parameters:
        
        - T: Decision periods (time-steps)
    
        
2. 'continuous': Neverending episodes
    
    Related parameters: 
        
        - gamma: Discount factor

(For internal environment's processes: 1 for episodic, 0 for continouos)

In [3]:
horizon_type = 'episodic'

T = 5

### Look-ahead approximation: Generation of sample paths (look_ahead = ['d']):

1. List of parameters to be forcasted on the look-ahead approximation ['d', 'p', ...]

2. List with '*' to generate forecasts for all parameters

3. False for no sample path generation

Related parameters:

    - S: Number of sample paths
    
    - LA_horizon: Number of look-ahead periods

In [4]:
look_ahead = ['*']

S = 2
LA_horizon = 3

### Historic data: Generation or usage of historic data (historic_data = ['d'])   

1. ['d', 'p', ...]: List with the parameters the historic info will be generated for

2.  ['*']: Historic info generated for all parameters

3. !!! NOT DEVELOPED path: File path to be processed by upload_historic_data() 

4.  False: No historic data will be used

Related parameter:
    
    - hist_window: Initial log size (time periods)

In [5]:
historic_data = ['*']

hist_window = 10

### Back-orders: Catch unsatisfied demand (back_orders = False):

1. 'back-orders': Demand can be not fully satisfied. Non-complied orders will be automatically fullfilled with an extra-cost

2. 'back-logs': Demand can be not fully satisfied. Non-complied orders will be registered and kept track of

3. False: All demand must be fullfilled

Related parameter:

    - back_o_cost = 20
    - back_l_cost = 20

In [6]:
back_orders = False

### Other customizable parameters

    -   M = 10: Number of suppliers

    -   K = 10: Number of Products

    -   F = 2:  Number of vehicles on the fleete

    -   T = 6:  Number of decision periods

    -   wh_cap = 1e9: Warehouse capacity

    -   min/max_sprice: Max and min selling prices (per m and k)

    -   min/max_hprice: Max and min holding cost (per k)

    -   penalization_cost: Penalization costs for RL (invalid actions, etc.)

    -   S = 4:  Number of sample paths 

    -   LA_horizon = 5: Number of look-ahead periods

    -   lambda1 = 0.5: Controls demand, assures feasibility

In [7]:
env_config = {  'M': 3, 
                'K': 3, 
                'T': T, 
                'F': 2, 
                
                'min_sprice': 1, 
                'max_sprice': 500, 
                'min_hprice': 1, 
                'max_hprice': 500, 
                'back_l_cost': 20,
                
                'S': S, 
                'LA_horizon': LA_horizon, 
                'lambda1': 0.5
            }
            

# Creating an environment

The environment receives all the previous parameters plus a random seed as the parameters and a customizable parameter env_config with the specified characteristics
    
    -   rd_seed: Seed for random number generation

    -   env_config: Receives a dictionary with custom environment parameters

In [8]:
env = steroid_IRP(  horizon_type = horizon_type, 
                    look_ahead = look_ahead, 
                    historic_data = historic_data, 
                    back_orders = back_orders,
                    rd_seed = rd_seed, 
                    env_config = env_config)
repr(env)

'Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products instance. V = 3; K = 3; F = 2'

# Reseting the environment

Once the environment is created, or everytime it will be run again from the start, it must be reset. For this, the class has the step method which receives a boolean under the parameter:

    -   return_state: Indicates if reset() must return the initial state

In [9]:
return_state = False
env.reset(return_state = return_state)

# Step

Information from the intial state is retrieved. 

In [10]:
print(f'######################################## Time step {env.t} ########################################')
print('   (prod,edad)')
print(f's_{env.t}: {env.state}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}') 


######################################## Time step 0 ########################################
   (prod,edad)
s_0: {(0, 1): 0, (0, 2): 0, (1, 1): 0, (1, 2): 0, (1, 3): 0, (1, 4): 0, (1, 5): 0, (2, 1): 0}
d_0: {0: 4.0, 1: 0.0, 2: 15.0}
q_0: {(1, 0): 0, (1, 1): 0, (1, 2): 15, (2, 0): 4, (2, 1): 0, (2, 2): 0}


An arbitrary feasible action is generated and its cost is computed 

In [11]:
# Visiting all the suppliers
routes = [[0,1,0], [0,2,0]]

# Purchase exact quantity for 
purchase = gen_purchs_dic(env)
print(purchase)

# Demand complience
demand_complience = gen_dem_dic(env)

back_logs_complience = {(0,0): 0, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                        (1,0): 0, (1,1): 0, (1,2): 0, (1,3): 0, (1,4): 0,
                        (2,0): 0,  (2,1): 0}

X = [routes, purchase, demand_complience, back_logs_complience]

transport_cost = env.c[routes[0][0], routes[0][1]] + env.c[routes[0][1], routes[0][2]] + \
                 env.c[routes[1][0], routes[1][1]] + env.c[routes[1][1], routes[1][2]] 
                 
purchase_cost = 0
for i in env.Suppliers:
    for k in env.Products:
        purchase_cost += purchase[i,k] * env.p[i,k]
holding_cost = env.h[1] * 2 + env.h[0] * 1
total_cost = transport_cost + purchase_cost + holding_cost 
print(f'The total cost of the action is: {total_cost}')

{(1, 0): 0, (1, 1): 0, (1, 2): 0, (2, 0): 0, (2, 1): 0, (2, 2): 0}
The total cost of the action is: 2694


With a **valid** action, the step method can be called. This method returns:
    
    -   state: New state
    -   reward: The total cost of the action (transport, purchase and holding)
    -   done: Indicates if the episode has finished
    -   _: Extra information 

In [None]:
state, reward, done, _ = env.step(action = X, validate_action = True)

print(f'The computated cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'######################################## Time step {env.t} ########################################')
print(f's_{env.t}: {env.state}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}')

In [None]:
# Visiting all the suppliers
routes = [[0,1,2,0]]

# Purchase exact quantity for 
purchase = {(1,0): 8,  (2,0): 9,    # product 0: 17 units
            (1,1): 11, (2,1): 15,   # product 1: 26 units
            (1,2): 0,  (2,2): 4}    # product 2: 4 units

# Demand complience
demand_complience = {(0,0): 13, (0,1): 0, (0,2): 0, (0,3): 0, (0,4): 0,
                     (1,0): 19.5, (1,1): 1, (1,2): 0, (1,3): 0,  (1,4): 0,
                     (2,0): 4, (2,1): 0}

X = [routes, purchase, demand_complience]

transport_cost = env.c[routes[0][0], routes[0][1]] + env.c[routes[0][1], routes[0][2]] + env.c[routes[0][2], routes[0][3]]
purchase_cost = 0
for i in env.Suppliers:
    for k in env.Products:
        purchase_cost += purchase[i,k] * env.p[i,k]
holding_cost = 5 * env.h[0] + 7.5 * env.h[1]
total_cost = transport_cost + purchase_cost + holding_cost 
print(f'The total cost of the action is: {total_cost}')

In [None]:
state, reward, done, _  = env.step(action = X, validate_action = True)
print(f'The computated cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'######################################## Time step {env.t} ########################################')
print(f's_{env.t}: {env.state}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}')

Let's try some invalid actions on the environment
