In [5]:
import itertools
import random
import numpy as np
import gym
import time
from gym import spaces
from typing import Tuple, List
from envs.broken_components import BrokenComponentsEnv
from envs.data_handler import DataHandler
from IPython.display import clear_output

In [6]:
data_handler = DataHandler()
broken_components = data_handler.get_sample_component_failure_pairs(5)
broken_components

[('User Management Service', 'CF5'),
 ('Seller Reputation Item Filter', 'CF3'),
 ('Region Item Filter', 'CF3'),
 ('Availability Item Filter', 'CF2'),
 ('Comment Item Filter', 'CF2')]

## Environment

In [7]:
# possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
env = BrokenComponentsEnv(broken_components, reward_modus='raw')

In [24]:
for _ in range(10):
    env.reset()
    for _ in range(100):
        action = env.action_space.sample()
        env.step(action)
        print(action)

1
0
3
2
2
2
0
0
1
4
2
0
0
1
1
4
4
4
1
3
2
3
2
2
0
0
4
4
0
3
4
3
3
4
1
1
0
4
4
2
0
1
4
2
4
4
3
0
1
2
0
4
2
4
3
0
3
1
2
2
0
1
4
3
1
4
0
1
1
2
3
4
4
0
2
1
1
3
2
2
4
0
1
2
3
3
1
1
3
1
4
3
2
1
0
3
0
3
1
1
4
3
2
1
3
2
4
1
4
2
2
4
3
1
3
0
0
0
3
2
2
4
0
4
0
0
2
1
2
0
3
4
3
2
2
2
1
4
3
3
2
0
4
0
0
1
3
1
4
3
4
4
1
2
0
2
0
2
0
4
3
1
1
2
3
0
1
4
4
1
4
0
0
3
3
3
3
2
0
4
3
3
2
1
3
1
4
0
4
3
3
1
4
2
4
1
1
4
1
1
0
2
2
3
4
1
3
0
3
2
2
0
4
1
1
2
0
4
2
2
0
0
0
4
4
0
1
0
1
1
2
2
3
2
3
0
0
3
4
0
4
2
2
1
1
2
2
2
2
1
1
1
0
0
2
4
2
0
2
1
1
1
4
0
1
2
2
1
4
3
3
4
3
2
1
0
2
4
2
4
4
0
1
4
1
3
1
2
1
3
0
2
0
3
4
1
2
4
1
4
4
1
0
2
2
2
0
1
4
1
2
1
3
1
2
1
1
1
4
3
2
2
1
1
0
1
3
2
2
3
4
3
0
4
2
0
1
3
2
0
0
4
0
1
1
2
3
0
2
2
4
0
3
3
0
4
4
3
4
2
0
3
2
3
0
3
1
1
4
1
4
1
4
0
0
0
0
0
4
4
1
2
1
1
1
0
0
0
0
2
3
2
0
3
0
2
0
2
0
4
2
4
4
4
4
3
2
2
2
1
4
0
2
4
3
1
0
4
4
2
0
2
1
3
3
0
4
1
1
2
1
4
2
0
3
0
0
1
0
3
1
2
4
2
0
2
4
2
0
4
4
2
4
1
2
1
1
1
2
4
3
1
1
0
3
4
2
2
1
4
4
4
0
0
1
4
0
1
0
1
3
2
3
2
4
0
0
4
0
2
1
0
3
4
0
0
3
2
4
0


### Env properties

In [9]:
n_actions = env.action_space.n
n_actions

5

In [10]:
n_spaces = env.observation_space.n
n_spaces

32

In [11]:
env.action_space_names

array([('User Management Service', 'CF5'),
       ('Seller Reputation Item Filter', 'CF3'),
       ('Region Item Filter', 'CF3'), ('Availability Item Filter', 'CF2'),
       ('Comment Item Filter', 'CF2')], dtype=object)

In [12]:
env.observation_space_names

[array([('User Management Service', 'CF5'),
        ('Seller Reputation Item Filter', 'CF3'),
        ('Region Item Filter', 'CF3'), ('Availability Item Filter', 'CF2'),
        ('Comment Item Filter', 'CF2')], dtype=object),
 array([('User Management Service', 'CF5'),
        ('Seller Reputation Item Filter', 'CF3'),
        ('Region Item Filter', 'CF3'), ('Availability Item Filter', 'CF2')],
       dtype=object),
 array([('User Management Service', 'CF5'),
        ('Seller Reputation Item Filter', 'CF3'),
        ('Region Item Filter', 'CF3'), ('Comment Item Filter', 'CF2')],
       dtype=object),
 array([('User Management Service', 'CF5'),
        ('Seller Reputation Item Filter', 'CF3'),
        ('Region Item Filter', 'CF3')], dtype=object),
 array([('User Management Service', 'CF5'),
        ('Seller Reputation Item Filter', 'CF3'),
        ('Availability Item Filter', 'CF2'),
        ('Comment Item Filter', 'CF2')], dtype=object),
 array([('User Management Service', 'CF5'),
     

### Examples

#### Example 1

In [13]:
env.reset(reward_modus='raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

action = env.action_space.sample()
state, reward, done, _ = env.step(action)
print(state, reward, done)
env.render()

Steps:  0
Action:  None
Successful:  None
State:  [('User Management Service', 'CF5'), ('Seller Reputation Item Filter', 'CF3'), ('Region Item Filter', 'CF3'), ('Availability Item Filter', 'CF2'), ('Comment Item Filter', 'CF2')] 


4 151.0260728 False
Steps:  1
Action:  ('Region Item Filter', 'CF3')
Successful:  True
State:  [('User Management Service', 'CF5'), ('Seller Reputation Item Filter', 'CF3'), ('Availability Item Filter', 'CF2'), ('Comment Item Filter', 'CF2')] 


6 8.0 False
Steps:  2
Action:  ('Availability Item Filter', 'CF2')
Successful:  True
State:  [('User Management Service', 'CF5'), ('Seller Reputation Item Filter', 'CF3'), ('Comment Item Filter', 'CF2')] 


6 0 False
Steps:  3
Action:  ('Region Item Filter', 'CF3')
Successful:  False
State:  [('User Management Service', 'CF5'), ('Seller Reputation Item Filter', 'CF3'), ('Comment Item Filter', 'CF2')] 




#### Example 2

In [14]:
env.reset('raw') # possible reward_modus 'raw', 'sqt', 'log10', 'cubic'
accumulated_reward = 0
episode_done = False

while(not episode_done):
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    accumulated_reward += reward
    
    clear_output(wait=True)
    env.render()
    time.sleep(1)
    
    
    if(done):
        print('FINISHED!')
        print('Acc Reward: ', accumulated_reward)
    episode_done = done

Steps:  7
Action:  ('Seller Reputation Item Filter', 'CF3')
Successful:  True
State:  [] 


FINISHED!
Acc Reward:  1899.1198108000003
