In [None]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

## Set Global Parameters for Experiments

In [None]:
neg_inventory = False
epLen = 10
nEps = 2
numIters = 10

### Two Suppliers

In [None]:
CONFIG =  or_suite.envs.env_configs.inventory_control_multiple_suppliers_modified_config
CONFIG['neg_inventory'] = neg_inventory
CONFIG['demand_dist'] = lambda x: np.random.poisson(10)
# CONFIG['demand_dist'] = lambda x: 10
# CONFIG = or_suite.envs.env_configs.oil_environment_default_config
CONFIG['epLen'] = epLen
print(epLen)


In [None]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/ambulance/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : CONFIG['epLen'],
                    'render': False,
                    'pickle': False
                    }

env = gym.make('MultipleSuppliers-v0', config=CONFIG)
mon_env = Monitor(env)

In [None]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
#'Random': or_suite.agents.rl.random.randomAgent(),
'ZeroTen': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([0],10),
'TenZero': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([10],0),
'TBS': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([0],0)
}

In [None]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []

#each index of param_list is another list, param, where param[0] is r and param[1] is S
max_order = CONFIG['max_order']
param_list = []
for r in range(max_order+1):
    for S in range(max_order + 1):
        param_list.append([[r],S])
#param_list = [[[10], 0], [[0],10]]
        
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/inventory_control_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'TBS':
        or_suite.utils.run_single_algo_tune(env, agents[agent], param_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/inventory_control_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/inventory_control_'+str(agent))
        algo_list_radar.append(str(agent))

In [None]:
fig_path = '../figures/'
fig_name = 'inventory'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

additional_metric = {}
fig_name = 'inventory'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

# TODO: Import figures and display


###  One Supplier

In [None]:
CONFIG = {'lead_times': [5],
           'demand_dist': lambda x: np.random.poisson(10),
           'supplier_costs': [1],
           'hold_cost': 1,
           'backorder_cost': 10,
           'max_inventory': 1000,
           'max_order': 200,
           'epLen': 500,
           'starting_state': None,
           'neg_inventory': neg_inventory}
CONFIG['epLen'] = 100
epLen = CONFIG['epLen']
nEps = 2
numIters = 10

In [None]:
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/ambulance/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : CONFIG['epLen'],
                    'render': False,
                    'pickle': False
                    }

env = gym.make('MultipleSuppliers-v0', config=CONFIG)
mon_env = Monitor(env)

In [None]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
'Random': or_suite.agents.rl.random.randomAgent(),
'TBS': or_suite.agents.inventory_control_multiple_suppliers.base_surge.base_surgeAgent([],0)
}

In [None]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []

#each index of param_list is another list, param, where param[0] is r and param[1] is S
max_order = CONFIG['max_order']
param_list = []
for S in range(max_order + 1):
        param_list.append([[], S])
        
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/inventory_control_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'TBS':
        or_suite.utils.run_single_algo_tune(env, agents[agent], param_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/inventory_control_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/inventory_control_'+str(agent))
        algo_list_radar.append(str(agent))

In [None]:
fig_path = '../figures/'
fig_name = 'inventory'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

additional_metric = {}
fig_name = 'inventory'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)