In [1]:
import or_suite
import numpy as np
import gym

# One Supplier

In [2]:
CONFIG1 = {'L':[3], 'c': [100], 'lambda': 10, 'h':1, 'b':19, 'max_inventory':500, 'max_order':100, 'epLen':500}

In [3]:
env1 = gym.make('MultipleSuppliers-v0', config = CONFIG1)

In [4]:
print(env1.state)
print(env1.action_space)
print(env1.observation_space)

[0 0 0 0]
MultiDiscrete([101])
MultiDiscrete([101 101 101 500])


In [5]:
action = [15]
# testing the step function
newState, reward,  done, info = env1.step(action)
print(newState)
print(reward)
print(done)

[ 0  0 15  0]
0.0
False


In [6]:
newState, reward,  done, info = env1.step(action)
print(newState)
print(reward)
print(done)

[ 0 15 15  0]
-1500.0
False


In [7]:
newState, reward,  done, info = env1.step(action)
print(newState)
print(reward)
print(done)

[15 15 15  0]
-1500.0
False


In [20]:
newState, reward,  done, info = env1.step(action)
print(newState)
print(reward)
print(done)

[15 15 15 82]
-1575.0
False


# Two Suppliers

In [2]:
CONFIG = or_suite.envs.env_configs.inventory_control_multiple_suppliers_default_config

In [3]:
# making an instance of the environment
env = gym.make('MultipleSuppliers-v0', config=CONFIG)

In [4]:
print(env.state)
print(env.action_space)
print(env.observation_space)

[0 0 0 0 0 0 0]
MultiDiscrete([21 21])
MultiDiscrete([  21   21   21   21   21   21 1000])


In [5]:
action = [1,15]
# testing the step function
newState, reward,  done, info = env.step(action)
print(newState)
print(reward)
print(done)

[ 0  0  0  0  1 15  0]
0.0
False


In [8]:
newState, reward,  done, info = env.step(action)
print(newState)
print(reward)
print(done)
print(info['demand'])

[ 0  1  1  1  1 15 19]
-113.0
False
9


In [7]:
newState, reward,  done, info = env.step(action)
print(newState)
print(reward)
print(done)

[ 0  0  1  1  1 15  6]
-100.0
False


In [8]:
newState, reward,  done, info = env.step(action)
print(newState)
print(reward)
print(done)

[ 0  1  1  1  1 15  9]
-106.0
False


In [9]:
from stable_baselines3.common.env_checker import check_env
check_env(env, skip_render_check=True)

# Four Suppliers

In [None]:
CONFIG4 = {'L':[3, 2, 5, 1], 'c': [100, 200, 50, 1000], 'lambda': 10, 'h':1, 'b':19, 'max_inventory':500, 'max_order':100, 'epLen':500}

In [None]:
env4 = gym.make('MultipleSuppliers-v0', config = CONFIG4)

In [None]:
print(env4.state)
print(env4.action_space)
print(env4.observation_space)

In [None]:
action = [15, 2, 7, 8]
# testing the step function
newState, reward,  done, info = env4.step(action)
print(newState)
print(reward)
print(done)

In [None]:
newState, reward,  done, info = env4.step(action)
print(newState)
print(reward)
print(done)

In [None]:
newState, reward,  done, info = env4.step(action)
print(newState)
print(reward)
print(done)

In [None]:
newState, reward,  done, info = env4.step(action)
print(newState)
print(reward)
print(done)

# Vaccine Allotment Code Demonstration

Reinforcement learning (RL) is a natural model for problems involving real-time sequential decision making. In these models, a principal interacts with a system having stochastic transitions and rewards and aims to control the system online (by exploring available actions using real-time feedback) or offline (by exploiting known properties of the system).

This project revolves around providing a unified landscape on scaling reinforcement learning algorithms to operations research domains.

In this notebook we walk through generating plots, and applying the problem to the `vaccine allotment` problem with a population of size $P$ split into four risk classes, a discrete state space $\mathcal{S} = \{0, 1, 2, \ldots, P\}^{11}$, and a discrete action space consisting of "priority orders" corresponding to how we allot vaccines to the four risk classes. In this case, a valid priority order is one of two options: 
1. an empty list -- interpreted as no priority order, meaning we vaccinate the population randomly
2. a permutation of the numbers $\{1,2,3,4\}$ -- interpreted as the order in which we vaccinate the risk classes

### Step 1: Import Required Packages

The main package for ORSuite is contained in `or_suite`.  However, some additional packages may be required for specific environments / algorithms.  Here, we include `stable baselines`, a package containing implementation for state of the art deep RL algorithms, and `matploblib` for the plotting.

In [10]:
import or_suite
import gym
import matplotlib.pyplot as plt
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
import numpy as np
import pickle

### Step 2: Pick problem parameters for the environment

Here we use the ambulance metric environment as outlined in `or_suite/envs/ambulance/ambulance_metric.py`.  The package has default specifications for all of the environments in the file `or_suite/envs/env_configs.py`, and so we use one the default for the ambulance problem in a metric space.

In addition, we need to specify the number of episodes for learning, and the number of iterations (in order to plot average results with confidence intervals).

In [11]:
DEFAULT_CONFIG = or_suite.envs.env_configs.vaccine_default_config1
epLen = DEFAULT_CONFIG['epLen']
nEps = 200
numIters = 5

### Step 3: Pick simulation parameters

Next we need to specify parameters for the simulation.  This includes setting a seed, the frequency to record the metrics, directory path for saving the data files, a deBug mode which prints the trajectory, etc.

In [12]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/vaccine/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters,
                    'render': False,
                    'saveTrajectory': True, 
                    'epLen' : 5,
                    'pickle': False}

vaccine_env = gym.make('Vaccine-v0', config=DEFAULT_CONFIG)
mon_env = Monitor(vaccine_env)

### Step 4: Pick list of algorithms

We have several heuristics implemented for each of the environments defined, in addition to a `random` policy, and some `RL discretization based` algorithms.  Here we pick a couple of the heuristics, and a PPO algorithm implemented from `stable baselines` just to test.

In [13]:
agents = {'Random': or_suite.agents.rl.random.randomAgent(),
          'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
          }

We recommend using a `batch_size` that is a multiple of `n_steps * n_envs`.
Info: (n_steps=4 and n_envs=1)


### Step 5: Run simulations

In [14]:
# or_suite.utils.run_single_algo(mon_env, agents['Random'], DEFAULT_SETTINGS)

In [None]:
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/vaccine_metric_test_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(mon_env, agents[agent], DEFAULT_SETTINGS)

Random
**************************************************
Running experiment
**************************************************
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.

 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event n

 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event number 400.0
 We've reached vaccination event number 500.0
 We've reached vaccination event number 600.0
 We've reached vaccination event number 700.0
 We've reached vaccination event number 100.0
 We've reached vaccination event number 200.0
 We've reached vaccination event number 300.0
 We've reached vaccination event n

### Step 6: Generate figures

In [None]:
'''
# Plots for just the random agent

path_list_line = []
path_list_radar = []
algo_list_line = []
algo_list_radar = []

#print(str(agents['Random']))
path_list_line.append('../data/vaccine_metric_test_'+'Random')
algo_list_line.append("Random")
path_list_radar.append('../data/vaccine_metric_test_'+'Random'+'/')
algo_list_radar.append("Random")
'''

In [None]:
'''
# Plots for just the random agent

fig_path = '../figures/'
fig_name = 'test_vaccine_metric.pdf'

or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40) + 1)
'''

In [None]:
#additional_metric = {'MRT': lambda traj : or_suite.utils.mean_response_time(traj, lambda x, y : np.abs(x-y))}

#or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar, fig_path, fig_name, None)

In [None]:
import os
os.getcwd()

In [None]:
path_list_line = []
path_list_radar = []
algo_list_line = []
algo_list_radar = []

for agent in agents:
    print(str(agent))
    path_list_line.append('../data/vaccine_metric_test_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':    
        path_list_radar.append('../data/vaccine_metric_test_'+str(agent)+'/')
        algo_list_radar.append(str(agent))

    

fig_path = '../figures/'
fig_name1 = 'test_vaccine_metric_line.pdf'
fig_name2 = 'test_vaccine_metric_radar.pdf'

or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name1, int(nEps / 40) + 1)

In [None]:
additional_metric = None

or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar, fig_path, fig_name2, additional_metric)