# World of Supply

A simulation environment for multi-echelon supply chain optimization problems. 

In [1]:
import numpy as np
from tqdm import tqdm as tqdm
import importlib
from IPython.display import display, HTML
from IPython.display import Image
from dataclasses import dataclass

# Core Simulation Logic

In [37]:
import world_of_supply_environment as ws
importlib.reload(ws)
import world_of_supply_renderer as wsr
importlib.reload(wsr)

# Measure the simulation rate, steps/sec
world = ws.WorldBuilder.create(80, 16)
policy = ws.SimpleControlPolicy()
for i in tqdm(range(10000)):
    world.act(policy.compute_control(world))
    
# Test rendering
renderer = wsr.AsciiWorldRenderer()
frame_seq = []
world = ws.WorldBuilder.create(80, 16)
policy = ws.SimpleControlPolicy()
for epoch in range(1000):
    if epoch % 20 == 0:
        print(f"Rendering epoch {epoch}")
    frame = renderer.render(world)
    frame_seq.append(np.asarray(frame))
    world.act(policy.compute_control(world))

wsr.AsciiWorldRenderer.plot_sequence_images(frame_seq)

100%|██████████| 10000/10000 [00:01<00:00, 5005.01it/s]


Rendering epoch 0
Rendering epoch 20
Rendering epoch 40
Rendering epoch 60
Rendering epoch 80
Rendering epoch 100
Rendering epoch 120
Rendering epoch 140
Rendering epoch 160
Rendering epoch 180
Rendering epoch 200
Rendering epoch 220
Rendering epoch 240
Rendering epoch 260
Rendering epoch 280
Rendering epoch 300
Rendering epoch 320
Rendering epoch 340
Rendering epoch 360
Rendering epoch 380
Rendering epoch 400
Rendering epoch 420
Rendering epoch 440
Rendering epoch 460
Rendering epoch 480
Rendering epoch 500
Rendering epoch 520
Rendering epoch 540
Rendering epoch 560
Rendering epoch 580
Rendering epoch 600
Rendering epoch 620
Rendering epoch 640
Rendering epoch 660
Rendering epoch 680
Rendering epoch 700
Rendering epoch 720
Rendering epoch 740
Rendering epoch 760
Rendering epoch 780
Rendering epoch 800
Rendering epoch 820
Rendering epoch 840
Rendering epoch 860
Rendering epoch 880
Rendering epoch 900
Rendering epoch 920
Rendering epoch 940
Rendering epoch 960
Rendering epoch 980


<Figure size 1420x1712 with 0 Axes>

In [None]:
import world_of_supply_rllib as wsr
importlib.reload(wsr)

env = wsr.WorldOfSupplyEnv({'episod_duration': 1000})
env.reset()

In [None]:
import world_of_supply_rllib as wsr
importlib.reload(wsr)

import ray
import ray.rllib.agents.ddpg as ddpg
from ray.tune.logger import pretty_print
from ray.rllib.utils import try_import_tf

from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy

tf = try_import_tf()

ray.shutdown()
ray.init()

policies = {
        "raw_materials": (PPOTFPolicy, env.observation_space, env.action_space, {'use_lstm': True}),
        "toy_factory":   (PPOTFPolicy, env.observation_space, env.action_space, {'use_lstm': True}),
        "warehouse":     (PPOTFPolicy, env.observation_space, env.action_space, {'use_lstm': True}),
        "retailer":      (PPOTFPolicy, env.observation_space, env.action_space, {'use_lstm': True})
    }

def policy_mapping_fn(agent_id):
    if agent_id.startswith("SteelFactory"): return "raw_materials" 
    if agent_id.startswith("LumberFactory"): return "raw_materials" 
    if agent_id.startswith("ToyFactory"): return "toy_factory" 
    if agent_id.startswith("Warehouse"): return "warehouse" 
    if agent_id.startswith("Retailer"): return "retailer" 
        
ppo_trainer = PPOTrainer(
        env = wsr.WorldOfSupplyEnv,
        config = {
            "env_config": {
                "episod_duration": 1000
            },
            "num_envs_per_worker": 2,
            "num_workers": 6,
            "multiagent": {
                "policies": policies,
                "policy_mapping_fn": policy_mapping_fn
            }
        })


for i in range(10):
    print("== Iteration", i, "==")
    print(pretty_print(ppo_trainer.train()))