In [1]:
import itertools
import random
import numpy as np
import gym
import time
from gym import spaces
from typing import Tuple, List
from broken_components import BrokenComponentsEnv, DATA_HANDLER
from IPython.display import clear_output

In [2]:
broken_components = DATA_HANDLER.get_sample_component_failure_pairs(5)
broken_components

[('Comment Item Filter', 'CF5'),
 ('Region Item Filter', 'CF1'),
 ('Category Item Filter', 'CF2'),
 ('Past Sales Item Filter', 'CF5'),
 ('Reputation Service', 'CF1')]

## Environment

In [3]:
# possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
env = BrokenComponentsEnv(broken_components, reward_modus='raw')

### Env properties

In [4]:
n_actions = env.action_space.n
n_actions

5

In [5]:
n_spaces = env.observation_space.n
n_spaces

32

In [6]:
env.action_space_names

array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
       ('Category Item Filter', 'CF2'), ('Past Sales Item Filter', 'CF5'),
       ('Reputation Service', 'CF1')], dtype=object)

In [7]:
env.observation_space_names

[array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Category Item Filter', 'CF2'), ('Past Sales Item Filter', 'CF5'),
        ('Reputation Service', 'CF1')], dtype=object),
 array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Category Item Filter', 'CF2'), ('Past Sales Item Filter', 'CF5')],
       dtype=object),
 array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Category Item Filter', 'CF2'), ('Reputation Service', 'CF1')],
       dtype=object),
 array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Category Item Filter', 'CF2')], dtype=object),
 array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Past Sales Item Filter', 'CF5'), ('Reputation Service', 'CF1')],
       dtype=object),
 array([('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'),
        ('Past Sales Item Filter', 'CF5')], dtype=object),
 array([('Comment Item Filter', 'CF5

### Examples

#### Example 1

In [8]:
env.reset(reward_modus='raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

Steps:  0
Action:  None
Successful:  None
State:  [('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'), ('Category Item Filter', 'CF2'), ('Past Sales Item Filter', 'CF5'), ('Reputation Service', 'CF1')] 


2 1373.772889 False
Steps:  1
Action:  ('Past Sales Item Filter', 'CF5')
Successful:  True
State:  [('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'), ('Category Item Filter', 'CF2'), ('Reputation Service', 'CF1')] 


2 0 False
Steps:  2
Action:  ('Past Sales Item Filter', 'CF5')
Successful:  False
State:  [('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'), ('Category Item Filter', 'CF2'), ('Reputation Service', 'CF1')] 


2 0 False
Steps:  3
Action:  ('Past Sales Item Filter', 'CF5')
Successful:  False
State:  [('Comment Item Filter', 'CF5'), ('Region Item Filter', 'CF1'), ('Category Item Filter', 'CF2'), ('Reputation Service', 'CF1')] 




#### Example 2

In [10]:
env.reset('raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
accumulated_reward = 0
episode_done = False

while(not episode_done):
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    accumulated_reward += reward
    
    clear_output(wait=True)
    env.render()
    time.sleep(1)
    
    
    if(done):
        print('FINISHED!')
        print('Acc Reward: ', accumulated_reward)
    episode_done = done

Steps:  25
Action:  ('Region Item Filter', 'CF1')
Successful:  True
State:  [] 


FINISHED!
Acc Reward:  1162.8913083500001
