In [1]:
import or_gym
import gym
from gym import spaces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
env_name = 'InvManagement-v2'
env = or_gym.make(env_name)

In [3]:
def plot_network(env):
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    adjacency_matrix = np.vstack(env.graph.edges())
    # Set node levels
    levels = {}
    # levels['market'] = np.array([0])
    levels['retailer'] = np.array([1])
    levels['distributor'] = np.unique(np.hstack(
        [list(env.graph.predecessors(i)) for i in levels['retailer']]))
    levels['manufacturer'] = np.unique(np.hstack(
        [list(env.graph.predecessors(i)) for i in levels['distributor']]))
    levels['raw_materials'] = np.unique(np.hstack(
        [list(env.graph.predecessors(i)) for i in levels['manufacturer']]))

    level_col = {'retailer': 0,
                 'distributor': 1,
                 'manufacturer': 2,
                 'raw_materials': 3}

    max_density = np.max([len(v) for v in levels.values()])
    node_coords = {}
    node_num = 1
    plt.figure(figsize=(12,8))
    for i, (level, nodes) in enumerate(levels.items()):
        n = len(nodes)
        node_y = max_density / 2 if n == 1 else np.linspace(0, max_density, n)
        node_y = np.atleast_1d(node_y)
        plt.scatter(np.repeat(i, n), node_y, label=level, s=50)
        for y in node_y:
            plt.annotate(r'$N_{}$'.format(node_num), xy=(i, y+0.05))
            node_coords[node_num] = (i, y)
            node_num += 1

    # Draw edges
    for node_num, v in node_coords.items():
        x, y = v
        sinks = adjacency_matrix[np.where(adjacency_matrix[:, 0]==node_num)][:, 1]
        for s in sinks:
            try:
                sink_coord = node_coords[s]
            except KeyError:
                continue
            for k, n in levels.items():
                if node_num in n:
                    color = colors[level_col[k]]
            x_ = np.hstack([x, sink_coord[0]])
            y_ = np.hstack([y, sink_coord[1]])
            plt.plot(x_, y_, color=color)

    plt.ylabel('Node')
    plt.yticks([0], [''])
    plt.xlabel('Level')
    plt.xticks(np.arange(len(levels)), [k for k in levels.keys()])
    plt.show()

# Build RL Model

In [4]:
import ray
from ray import tune
from ray.rllib import agents
ray.init(ignore_reinit_error=True)

2020-09-18 16:59:35,561	INFO resource_spec.py:212 -- Starting Ray with 0.63 GiB memory available for workers and up to 0.33 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-09-18 16:59:37,263	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '192.168.0.13',
 'raylet_ip_address': '192.168.0.13',
 'redis_address': '192.168.0.13:6379',
 'object_store_address': '/tmp/ray/session_2020-09-18_16-59-35_452827_6507/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-09-18_16-59-35_452827_6507/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-09-18_16-59-35_452827_6507'}

In [5]:
env_config = {}
tune.register_env(env_name, 
    lambda env_name: env(env_name, env_config=env_config))

In [6]:
rl_config = {'num_workers': 1}
trainer = agents.ppo.PPOTrainer(rl_config, env=or_gym.utils.create_env(env_name))

2020-09-18 16:59:55,044	INFO trainer.py:585 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-09-18 16:59:55,045	INFO trainer.py:612 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


In [7]:
trainer.train()

RayTaskError(ValueError): [36mray::RolloutWorker.par_iter_next()[39m (pid=6565, ip=192.168.0.13)
  File "python/ray/_raylet.pyx", line 446, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 400, in ray._raylet.execute_task.function_executor
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/util/iter.py", line 1125, in par_iter_next
    return next(self.local_it)
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 263, in gen_rollouts
    yield self.sample()
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 528, in sample
    batches = [self.input_reader.next()]
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/evaluation/sampler.py", line 59, in next
    batches = [self.get_data()]
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/evaluation/sampler.py", line 164, in get_data
    item = next(self.rollout_provider)
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/evaluation/sampler.py", line 518, in _env_runner
    base_env.send_actions(actions_to_send)
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/env/base_env.py", line 329, in send_actions
    self.vector_env.vector_step(action_vector)
  File "/home/cdhubbs/miniconda3/envs/alphadow/lib/python3.7/site-packages/ray/rllib/env/vector_env.py", line 136, in vector_step
    obs, r, done, info = self.envs[i].step(actions[i])
  File "/mnt/c/Users/u755275/GitHub/or-gym/or_gym/envs/supply_network/inventory_management.py", line 451, in step
    self._update_state()
  File "/mnt/c/Users/u755275/GitHub/or-gym/or_gym/envs/supply_network/inventory_management.py", line 339, in _update_state
    state[-m*t:] += self.action_log[:t].flatten()
ValueError: operands could not be broadcast together with shapes (6,) (8,) (6,)

In [9]:
env.action_log.shape

(30, 8)

In [33]:
{key: action[i-1] for i, key in enumerate(env.graph.edges()) if i > 0}

{(2, 1): 0.0,
 (3, 1): 0.0,
 (4, 2): 0.0,
 (4, 3): 0.927230954170227,
 (5, 3): 0.7759756445884705,
 (6, 4): 0.0,
 (6, 5): 0.0,
 (7, 6): 1.3788892030715942}