In [2]:
import itertools
import random
import numpy as np
import gym
import time
from gym import spaces
from typing import Tuple, List
import sys
if "../" not in sys.path:
  sys.path.append("../") 
from envs.broken_components import BrokenComponentsEnv
from envs.data_handler import DataHandler
from IPython.display import clear_output

In [3]:
dh = DataHandler()
broken_components = dh.get_sample_component_failure_pairs(5)
broken_components

[('Persistence Service', 'CF5'),
 ('Region Item Filter', 'CF2'),
 ('Past Sales Item Filter', 'CF1'),
 ('Buy Now Item Filter', 'CF5'),
 ('Query Service', 'CF1')]

## Environment

In [4]:
# possible reward_modus 'raw', 'sqt', 'log10', 'cube'
env = BrokenComponentsEnv(broken_components, reward_modus='raw', reward_decrease=True, reward_decrease_factor=0.99)

### Env properties

In [5]:
n_actions = env.action_space.n
n_actions

5

In [6]:
n_states = env.observation_space.n
n_states

32

In [7]:
env.action_space_names

array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
       ('Past Sales Item Filter', 'CF1'), ('Buy Now Item Filter', 'CF5'),
       ('Query Service', 'CF1')], dtype=object)

In [8]:
env.observation_space_names

[array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Past Sales Item Filter', 'CF1'), ('Buy Now Item Filter', 'CF5'),
        ('Query Service', 'CF1')], dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Past Sales Item Filter', 'CF1'), ('Buy Now Item Filter', 'CF5')],
       dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Past Sales Item Filter', 'CF1'), ('Query Service', 'CF1')],
       dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Past Sales Item Filter', 'CF1')], dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Buy Now Item Filter', 'CF5'), ('Query Service', 'CF1')],
       dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'),
        ('Buy Now Item Filter', 'CF5')], dtype=object),
 array([('Persistence Service', 'CF5'), ('Region Item F

### Examples

#### Example 1

In [9]:
env.reset(reward_modus='raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

Steps:  0
Action:  None
Successful:  None
State:  [('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'), ('Past Sales Item Filter', 'CF1'), ('Buy Now Item Filter', 'CF5'), ('Query Service', 'CF1')] 


4 1331.6662161 False
Steps:  1
Action:  ('Past Sales Item Filter', 'CF1')
Successful:  True
State:  [('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'), ('Buy Now Item Filter', 'CF5'), ('Query Service', 'CF1')] 


6 110.01007036404002 False
Steps:  2
Action:  ('Buy Now Item Filter', 'CF5')
Successful:  True
State:  [('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'), ('Query Service', 'CF1')] 


6 -18911.34293578398 False
Steps:  3
Action:  ('Past Sales Item Filter', 'CF1')
Successful:  False
State:  [('Persistence Service', 'CF5'), ('Region Item Filter', 'CF2'), ('Query Service', 'CF1')] 




#### Example 2

In [11]:
env.reset('raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cube'
accumulated_reward = 0
episode_done = False

while(not episode_done):
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    accumulated_reward += reward
    
    clear_output(wait=True)
    env.render()
    print('Reward: ', reward)
    time.sleep(1)
    
    
    if(done):
        print('FINISHED!')
        print('Acc Reward: ', accumulated_reward)
    episode_done = done

Steps:  11
Action:  ('Buy Now Item Filter', 'CF5')
Successful:  True
State:  [] 


Reward:  187.46707400727914
FINISHED!
Acc Reward:  -86307.59557388765
