In [1]:
from SD_IB_IRP_PPenv import steroid_IRP
from random import choice, randint
from termcolor import colored

rd_seed = 2

### Auxiliary functions for the demo
def print_valid(statement): 
    if statement:  return 'Passed', 'green'
    else: return 'Failed', 'red'

def gen_purchs_dic(env):
    ret_dic = {(i,k):0 for i in env.Suppliers for k in env.Products}
    return ret_dic

def gen_dem_dic(env):
    ret_dic = {(k,o): 0 for k in env.Products for o in range(env.O_k[k] + 1)}
    return ret_dic

def print_state(env):
    state = 'State: \n'
    
    for k in env.Products:
        state += f'Product {k}: '
        for o in env.Ages[k]:   state += f'age {o}: {env.state[k,o]}; '
        if env.others['back_orders'] == 'back-logs':    state += f'BL: {env.state[k,"B"]}\n'
        else:   state += '\n'

    return state

# Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products environment
by: Juan Betancourt

## INITIALIZATION

### Time Horizon: Two time horizon types (horizon_type = 'episodic')

1. 'episodic': Every episode (simulation) has a finite number of steps
    
    Related parameters:
        
        - T: Decision periods (time-steps)
    
        
2. 'continuous': Neverending episodes
    
    Related parameters: 
        
        - gamma: Discount factor

(For internal environment's processes: 1 for episodic, 0 for continouos)

### Look-ahead approximation: Generation of sample paths (look_ahead = ['d']):

1. List of parameters to be forcasted on the look-ahead approximation ['d', 'p', ...]

2. List with '*' to generate forecasts for all parameters

3. False for no sample path generation

Related parameters:

    - S: Number of sample paths
    
    - LA_horizon: Number of look-ahead periods

### Historic data: Generation or usage of historic data (historic_data = ['d'])   

1. ['d', 'p', ...]: List with the parameters the historic info will be generated for

2.  ['*']: Historic info generated for all parameters

3. !!! NOT DEVELOPED path: File path to be processed by upload_historic_data() 

4.  False: No historic data will be used

Related parameter:
    
    - hist_window: Initial log size (time periods)

### Back-orders: Catch unsatisfied demand (back_orders = False):

1. 'back-orders': Demand can be not fully satisfied. Non-complied orders will be automatically fullfilled with an extra-cost

2. 'back-logs': Demand can be not fully satisfied. Non-complied orders will be registered and kept track of

3. False: All demand must be fullfilled

Related parameter:

    - back_o_cost = 20
    - back_l_cost = 20

In [2]:
horizon_type = 'episodic'
T = 5
look_ahead = ['*']
S = 2
LA_horizon = 3
historic_data = ['*']
hist_window = 10

back_orders = 'back-logs'

### Other customizable parameters

    -   M = 10: Number of suppliers

    -   K = 10: Number of Products

    -   F = 2:  Number of vehicles on the fleete

    -   T = 6:  Number of decision periods

    -   wh_cap = 1e9: Warehouse capacity

    -   min/max_sprice: Max and min selling prices (per m and k)

    -   min/max_hprice: Max and min holding cost (per k)

    -   penalization_cost: Penalization costs for RL (invalid actions, etc.)

    -   S = 4:  Number of sample paths 

    -   LA_horizon = 5: Number of look-ahead periods

    -   lambda1 = 0.5: Controls demand, assures feasibility

In [3]:
env_config = {  'M': 3, 
                'K': 3, 
                'T': T, 
                'F': 2, 
                
                'min_sprice': 1, 
                'max_sprice': 500, 
                'min_hprice': 1, 
                'max_hprice': 500, 
                'back_l_cost': 20,
                
                'S': S, 
                'LA_horizon': LA_horizon, 
                'lambda1': 0.1
            }
            

# Creating an environment

The environment receives all the previous parameters plus a random seed as the parameters and a customizable parameter env_config with the specified characteristics
    
    -   rd_seed: Seed for random number generation

    -   env_config: Receives a dictionary with custom environment parameters

In [4]:
env = steroid_IRP(  horizon_type = horizon_type, 
                    look_ahead = look_ahead, 
                    historic_data = historic_data, 
                    back_orders = back_orders,
                    rd_seed = rd_seed, 
                    env_config = env_config)
repr(env)

'Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products instance. V = 3; K = 3; F = 2'

# Reseting the environment

Once the environment is created, or everytime it will be run again from the start, it must be reset. For this, the class has the step method which receives a boolean under the parameter:

    -   return_state: Indicates if reset() must return the initial state

In [5]:
return_state = False
env.reset(return_state = return_state)

# Step

Information from the intial state is retrieved. 

In [6]:
print(f'######################################## Time step {env.t} ########################################')
print('   (prod,edad)')
print(f's_{env.t}: {print_state(env)}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}') 


######################################## Time step 0 ########################################
   (prod,edad)
s_0: State: 
Product 0: age 1: 0; BL: 0
Product 1: age 1: 0; BL: 0
Product 2: age 1: 0; BL: 0

d_0: {0: 7.0, 1: 22.9, 2: 25.7}
q_0: {(1, 0): 0, (1, 1): 13, (1, 2): 13, (2, 0): 7, (2, 1): 11, (2, 2): 14}


An arbitrary feasible action is generated and its cost is computed 

In [7]:
# Visiting all the suppliers
from turtle import back


routes = [[0,1,0], [0,2,0]]

# Purchase exact quantity for 
purchase = gen_purchs_dic(env)
purchase[2,0] = 7

purchase[1,1] = 13;     purchase[2,1] = 11  # 24

purchase[1,2] = 13;     purchase[2,2] = 14  # 27

demand_complience = gen_dem_dic(env)
demand_complience[0,0] = 7
demand_complience[1,0] = 20 
demand_complience[2,0] = 25


back_o_complience = gen_dem_dic(env)


X = [routes, purchase, demand_complience, back_o_complience]

transport_cost = env.c[routes[0][0], routes[0][1]] + env.c[routes[0][1], routes[0][2]] + \
                 env.c[routes[1][0], routes[1][1]] + env.c[routes[1][1], routes[1][2]] 
                 
purchase_cost = 0
for i in env.Suppliers:
    for k in env.Products:
        purchase_cost += purchase[i,k] * env.p[i,k]

holding_cost = env.h[1] * 4 + env.h[2] * 2

back_o_cost = env.back_l_cost * 3.6

total_cost = transport_cost + purchase_cost + holding_cost + back_o_cost
print(f'The total cost of the action is: {total_cost}')

The total cost of the action is: 20871.0


With a **valid** action, the step method can be called. This method returns:
    
    -   state: New state
    -   reward: The total cost of the action (transport, purchase and holding)
    -   done: Indicates if the episode has finished
    -   _: Extra information 

In [8]:
state, reward, done, _ = env.step(action = X, validate_action = True, warnings = True)

print(f'The computated cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'######################################## Time step {env.t} ########################################')
print(f's_{env.t}: {print_state(env)}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}')

The computated cost of the action is 20871.0
Episode finished: False 

######################################## Time step 1 ########################################
s_1: State: 
Product 0: age 1: 0; BL: 0.0
Product 1: age 1: 4; BL: 2.9
Product 2: age 1: 2; BL: 0.7

d_1: {0: 1.0, 1: 16.4, 2: 25.7}
q_1: {(1, 0): 1, (1, 1): 6, (1, 2): 13, (2, 0): 0, (2, 1): 11, (2, 2): 14}


In [9]:
purchase = gen_purchs_dic(env)
purchase[1,0] = 0.5; 
purchase[1,1] = 6
purchase[1,2] = 13
purchase[2,2] = 13.7
demand_complience = gen_dem_dic(env)
demand_complience[0,0] = 0.2
demand_complience[2,0] = 25.7
back_o_complience = gen_dem_dic(env)
back_o_complience[2,1] = 0.7

X = [routes, purchase, demand_complience, back_o_complience]

state, reward, done, _ = env.step(action = X, validate_action = True, warnings = True)

print(f'The computated cost of the action is {reward}')
print(f'Episode finished: {done} \n')

print(f'######################################## Time step {env.t} ########################################')
print(f's_{env.t}: {print_state(env)}')
print(f'd_{env.t}: {env.d}')
print(f'q_{env.t}: {env.q}')

# x = env.historic_data['q'][1,0]
# print(f'Historic of a.q is: {x}')
# print(f'A.q is {env.q[1,0]}')


The computated cost of the action is 2000006891.5
Episode finished: False 

######################################## Time step 2 ########################################
s_2: State: 
Product 0: age 1: 0.3; BL: 0.8
Product 1: age 1: 6; BL: 19.3
Product 2: age 1: 1.0; BL: 0.0

d_2: {0: 2.0, 1: 0.0, 2: 0.0}
q_2: {(1, 0): 2, (1, 1): 0, (1, 2): 0, (2, 0): 0, (2, 1): 0, (2, 2): 0}
