# World of Supply

A simulation environment for multi-echelon supply chain optimization problems. 

In [1]:
import numpy as np
from tqdm import tqdm as tqdm
import importlib
from pprint import pprint
import random
from IPython.display import display, HTML
from IPython.display import Image
from dataclasses import dataclass

# Core Simulation Logic and Rendering

In this section, we test the core simulator and renderer (without RL adapters and integrations).

In [9]:
import world_of_supply_environment as ws
importlib.reload(ws)
import world_of_supply_renderer as wsr
importlib.reload(wsr)

# Measure the simulation rate, steps/sec
world = ws.WorldBuilder.create(80, 16)
policy = ws.SimpleControlPolicy()
for i in tqdm(range(10000)):
    world.act(policy.compute_control(world))
    
# Test rendering
renderer = wsr.AsciiWorldRenderer()
frame_seq = []
world = ws.WorldBuilder.create(80, 16)
policy = ws.SimpleControlPolicy()
for epoch in range(1000):
    if epoch % 20 == 0:
        print(f"Rendering epoch {epoch}")
    frame = renderer.render(world)
    frame_seq.append(np.asarray(frame))
    world.act(policy.compute_control(world))

wsr.AsciiWorldRenderer.plot_sequence_images(frame_seq)

100%|██████████| 10000/10000 [00:01<00:00, 5227.18it/s]


Rendering epoch 0
Rendering epoch 20
Rendering epoch 40
Rendering epoch 60
Rendering epoch 80
Rendering epoch 100
Rendering epoch 120
Rendering epoch 140
Rendering epoch 160
Rendering epoch 180
Rendering epoch 200
Rendering epoch 220
Rendering epoch 240
Rendering epoch 260
Rendering epoch 280
Rendering epoch 300
Rendering epoch 320
Rendering epoch 340
Rendering epoch 360
Rendering epoch 380
Rendering epoch 400
Rendering epoch 420
Rendering epoch 440
Rendering epoch 460
Rendering epoch 480
Rendering epoch 500
Rendering epoch 520
Rendering epoch 540
Rendering epoch 560
Rendering epoch 580
Rendering epoch 600
Rendering epoch 620
Rendering epoch 640
Rendering epoch 660
Rendering epoch 680
Rendering epoch 700
Rendering epoch 720
Rendering epoch 740
Rendering epoch 760
Rendering epoch 780
Rendering epoch 800
Rendering epoch 820
Rendering epoch 840
Rendering epoch 860
Rendering epoch 880
Rendering epoch 900
Rendering epoch 920
Rendering epoch 940
Rendering epoch 960
Rendering epoch 980


<Figure size 1420x1712 with 0 Axes>

# Policy Training

In [None]:
import world_of_supply_rllib as wsr
importlib.reload(wsr)
import world_of_supply_rllib_training as wst
importlib.reload(wst)

# Policy training
#trainer = wst.play_baseline(n_iterations = 5)
trainer = wst.train_ppo(n_iterations = 10, competing_policies = ['baseline', 'ppo'])

# Policy Evaluation

In this section, we evaluate the trained policy.

### Rendering One Episod for the Trained Policy

In [104]:
import world_of_supply_renderer as wsren
importlib.reload(wsren)
import world_of_supply_rllib as wsrl
importlib.reload(wsrl)
import world_of_supply_rllib_training as wstr
importlib.reload(wstr)

# Parameters of the tracing simulation
policy_mode = 'trainer_ppo' # 'trainer_baseline', 'trainer_ppo', 'inline_baseline'
episod_duration = 500
frames_to_render = episod_duration

# Create the environment
renderer = wsren.AsciiWorldRenderer()
frame_seq = []
env = wsrl.WorldOfSupplyEnv(wstr.env_config)
states = env.reset()
infos = None

# Create or fetch the policy
if policy_mode == 'trainer_baseline':
    policy = trainer.get_policy('baseline')
    
if policy_mode == 'trainer_ppo':
    policy = trainer.get_policy('ppo')
    
if policy_mode == 'inline_baseline':
    policy = wsrl.SimplePolicy(env.observation_space, env.action_space, {
            'facility_types': env.facility_types, 
            'number_of_products': env.n_products()
        })

# Simulation loop
for epoch in range(episod_duration):
    if epoch % 20 == 0:
        print(f"Rendering epoch {epoch}")
        
    action_dict = {}
    for facility_id, state in states.items():
        if infos is not None and facility_id in infos:
            action_dict[facility_id] = policy.compute_single_action( state, info=infos[facility_id], state=[] )[0] 
        else:
            action_dict[facility_id] = policy.compute_single_action( state, state=[] )[0] 
        
    states, reward, dones, infos = env.step(action_dict)
    
    if epoch > episod_duration-frames_to_render:
        frame = renderer.render(env.world)
        frame_seq.append(np.asarray(frame))

wsren.AsciiWorldRenderer.plot_sequence_images(frame_seq)

2020-04-14 10:28:41,535	INFO resource_spec.py:212 -- Starting Ray with 4.69 GiB memory available for workers and up to 2.37 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-04-14 10:28:41,863	INFO services.py:1148 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


Rendering epoch 0
Rendering epoch 20
Rendering epoch 40
Rendering epoch 60
Rendering epoch 80
Rendering epoch 100
Rendering epoch 120
Rendering epoch 140
Rendering epoch 160
Rendering epoch 180
Rendering epoch 200
Rendering epoch 220
Rendering epoch 240
Rendering epoch 260
Rendering epoch 280
Rendering epoch 300
Rendering epoch 320
Rendering epoch 340
Rendering epoch 360
Rendering epoch 380
Rendering epoch 400
Rendering epoch 420
Rendering epoch 440
Rendering epoch 460
Rendering epoch 480


<Figure size 1420x1712 with 0 Axes>