# OR Suite 

Reinforcement learning (RL) is a natural model for problems involving real-time sequential decision making, including inventory control, resource allocation, ridesharing systems, and ambulance routing. In these models, an agent interacts with a system that has stochastic transitions and rewards, and aims to control the system by maximizing their cumulative rewards across the trajectory. Reinforcement learning has been shown in practice to be an effective technique for learning complex control policies.

# Step 1: Package Installation
First we import the necessary packages

In [None]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

# Run Simulation with One Supplier

# Step 2: Pick problem parameters for the environment

Here we use the ambulance metric environment as outlined in `or_suite/envs/inventory_control_multiple_suppliers/multiple_suppliers_env.py`.  The package has default specifications for all of the environments in the file `or_suite/envs/env_configs.py`, and so we use the default for the inventory control problem.

In addition, we need to specify the number of episodes for learning, and the number of iterations (in order to plot average results with confidence intervals).


In [None]:
CONFIG = {'lead_times': [5],
           'demand_dist': lambda x: np.random.poisson(5),
           'supplier_costs': [10],
           'hold_cost': 1,
           'backorder_cost': 100,
           'max_inventory': 1000,
           'max_order': 50,
           'epLen': 500,
           'starting_state': None,
           'neg_inventory': False
         }
CONFIG['epLen'] = 100
epLen = CONFIG['epLen']
nEps = 2
numIters = 10


# Step 3: Pick simulation parameters

Next we need to specify parameters for the simulation. This includes setting a seed, the frequency to record the metrics, directory path for saving the data files, a deBug mode which prints the trajectory, etc.

In [None]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/ambulance/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : CONFIG['epLen'],
                    'render': False,
                    'pickle': False
                    }

env = gym.make('MultipleSuppliers-v0', config=CONFIG)
mon_env = Monitor(env)

# Step 4: Pick list of algorithms

We have several heuristics implemented for each of the environments defined, in addition to a `Random` policy, and some `RL discretization based` algorithms. 

The `Random` agent chooses random amounts to order from each supplier between 0 and the $maxorder$ value.

The `TBS` agent uses an order-up-to-amount, $S$, and for the supplier with the largest lead time, orders $S$ minus the current inventory. For the other suppliers, different values are used, which are stored in an array, $r$, which has the length of the number of suppliers minus 1. 

In [None]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
'Random': or_suite.agents.rl.random.randomAgent(),
'TBS': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([],0)
}

# Step 5: Run Simulations

Run the different heuristics in the environment

In [None]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []

#each index of param_list is another list, param, where param[0] is r and param[1] is S
max_order = CONFIG['max_order']
param_list = []
for S in range(max_order + 1):
        param_list.append([[],S])
        
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/inventory_control_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'TBS':
        or_suite.utils.run_single_algo_tune(env, agents[agent], param_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/inventory_control_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/inventory_control_'+str(agent))
        algo_list_radar.append(str(agent))

# Step 6: Generate Figures

Create a chart to compare the different heuristic functions

In [None]:
fig_path = '../figures/'
fig_name = 'inventory'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

additional_metric = {}
fig_name = 'inventory'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

# Run Simulation with 2 Suppliers


In [None]:
CONFIG =  or_suite.envs.env_configs.inventory_control_multiple_suppliers_modified_config
CONFIG['epLen'] = 500
CONFIG['neg_inventory']= False
epLen = CONFIG['epLen']
nEps = 2
numIters = 10
print(epLen)


In [None]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/ambulance/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : CONFIG['epLen'],
                    'render': False,
                    'pickle': False
                    }

env = gym.make('MultipleSuppliers-v0', config=CONFIG)
mon_env = Monitor(env)

In [None]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
'Random': or_suite.agents.rl.random.randomAgent(),
'TBS': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([14],0)
}

In [None]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []

#each index of param_list is another list, param, where param[0] is r and param[1] is S
max_order = CONFIG['max_order']
param_list = []
for r in range(max_order+1):
    for S in range(max_order + 1):
        param_list.append([[r],S])
        
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/inventory_control_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'TBS':
        or_suite.utils.run_single_algo_tune(env, agents[agent], param_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/inventory_control_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/inventory_control_'+str(agent))
        algo_list_radar.append(str(agent))

In [None]:
fig_path = '../figures/'
fig_name = 'inventory'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

additional_metric = {}
fig_name = 'inventory'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

# TODO: Import figures and display


# Run with 3 Suppliers

In [None]:
CONFIG = {'lead_times': [5, 7, 11],
           'demand_dist': lambda x: np.random.poisson(17),
           'supplier_costs': [100 ,85, 73],
           'hold_cost': 1,
           'backorder_cost': 200,
           'max_inventory': 1000,
           'max_order': 20,
           'epLen': 500,
           'starting_state': None,
           'neg_inventory': False
         }
CONFIG['epLen'] = 100
epLen = CONFIG['epLen']
nEps = 2
numIters = 10

In [None]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/ambulance/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : CONFIG['epLen'],
                    'render': False,
                    'pickle': False
                    }

env = gym.make('MultipleSuppliers-v0', config=CONFIG)
mon_env = Monitor(env)

In [None]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
'Random': or_suite.agents.rl.random.randomAgent(),
'TBS': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([14,14],0)
}

In [None]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []

#each index of param_list is another list, param, where param[0] is r and param[1] is S
max_order = CONFIG['max_order']
param_list = []
for S in range(max_order + 1):
    for r1 in range(max_order + 1):
        for r2 in range(max_order +1):
            param_list.append([[r1, r2],S])
        
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/inventory_control_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'TBS':
        or_suite.utils.run_single_algo_tune(env, agents[agent], param_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/inventory_control_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/inventory_control_'+str(agent))
        algo_list_radar.append(str(agent))

In [None]:
fig_path = '../figures/'
fig_name = 'inventory'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

additional_metric = {}
fig_name = 'inventory'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)