## Performance target: beat the classical baseline

### What is the classical baseline?

- Best performance achievable using classical methods (i.e. non ML techniques)

In [2]:
import numpy as np

from inventory_env.inventory_env import InventoryEnv, classical_baseline_action

env = InventoryEnv()
num_episodes = 100000
all_r = []
for _ in range(num_episodes):
    obs = env.reset()
    ep_r = 0
    while True:
        action = classical_baseline_action(obs)
        obs, r, done, _ = env.step(np.around(action))
        ep_r += r
        if done:
            break
    all_r.append(ep_r)
baseline = sum(all_r) / num_episodes
print(baseline)

174360.77630784345


In [5]:
import numpy as np

from inventory_env.inventory_env import InventoryEnv, classical_baseline_action

env = InventoryEnv()
num_episodes = 100000
all_r = []
for _ in range(num_episodes):
    obs = env.reset()
    ep_r = 0
    while True:
        action = classical_baseline_action(obs)
        obs, r, done, _ = env.step(np.around(action))
        ep_r += r
        if done:
            break
    all_r.append(ep_r)
baseline = sum(all_r) / num_episodes
print(baseline)

174583.1010589181


## Reproducible classical baseline using `env.seed()`

In [3]:
import numpy as np

from inventory_env.inventory_env import InventoryEnv, classical_baseline_action

env = InventoryEnv()
env.seed(seed=0)
num_episodes = 100000
all_r = []
for _ in range(num_episodes):
    obs = env.reset()
    ep_r = 0
    while True:
        action = classical_baseline_action(obs)
        obs, r, done, _ = env.step(np.around(action))
        ep_r += r
        if done:
            break
    all_r.append(ep_r)
baseline = sum(all_r) / num_episodes
print(baseline)

175078.67432675502


In [4]:
import numpy as np

from inventory_env.inventory_env import InventoryEnv, classical_baseline_action

env = InventoryEnv()
env.seed(seed=0)
num_episodes = 100000
all_r = []
for _ in range(num_episodes):
    obs = env.reset()
    ep_r = 0
    while True:
        action = classical_baseline_action(obs)
        obs, r, done, _ = env.step(np.around(action))
        ep_r += r
        if done:
            break
    all_r.append(ep_r)
baseline = sum(all_r) / num_episodes
print(baseline)

175078.67432675502
