In [1]:
cd ..

/Users/hao/workspace/hpi_de/4th_Semester/Online Learning/rl-4-self-repair


In [18]:
import random
import numpy as np
import pandas as pd
from envs.broken_components import BrokenComponentsEnv
from envs.data_handler import DataHandler

In [28]:
class EnvApproximator():
    def __init__(self, env):
        self.env = env
        n_actions = env.action_space.n
        self.failure_matrix_count = np.zeros((n_actions, n_actions))
    
    def fit_by_episodes(self, num_episodes, improved_sampling=False, verbose=False):
        episode = 0
        temp_actions = np.arange(self.env.action_space.n).tolist()
        while episode < num_episodes:
            temp_actions, episode = self.__update(temp_actions, improved_sampling, episode, verbose=verbose)
    
    def fit_by_steps(self, num_steps, improved_sampling=False, verbose=False):
        temp_actions = np.arange(self.env.action_space.n).tolist()
        for step in range(num_steps):
            temp_actions, _ = self.__update(temp_actions, improved_sampling, verbose=verbose)
    
    def __update(self, temp_actions, improved_sampling, episode=0, verbose=False):
        if improved_sampling:
            action = random.choice(temp_actions)
        else:
            action = self.env.action_space.sample()
        
        _, reward, done, state_vec = env.step(action)
        
        if verbose:
            print('Action: ', action)
            print('Reward: ', reward)
            print('Broken Comp:', state_vec, '\n')
        
        if reward == 0:
            state_vec[action] = 0
            self.failure_matrix_count[action] += state_vec
        elif reward > 0:
            self.failure_matrix_count[action] -= state_vec
                
            if improved_sampling:
                temp_actions.remove(action)
            
            if done:
                self.env.reset()
                episode += 1
                if improved_sampling:
                    temp_actions = np.arange(self.env.action_space.n).tolist()
                    
        return temp_actions, episode
        
    
    def calc_failure_matrix(self, as_df=False):
        failure_matrix_percentage = np.zeros((self.env.action_space.n, self.env.action_space.n))
        cliped_failure_matrix = np.clip(self.failure_matrix_count, a_min=0, a_max=None)
        
        for row in range(failure_matrix_percentage.shape[0]):
            if cliped_failure_matrix[row].sum() != 0:
                failure_matrix_percentage[row] = cliped_failure_matrix[row] / cliped_failure_matrix[row].sum()
                
        if as_df:
            component_names = [tup[0] for tup in self.env.action_space_names]
            return pd.DataFrame(failure_matrix_percentage, columns=component_names, index=component_names)
        else:
            return failure_matrix_percentage
    
    def reset(self):
        self.env.reset()
        self.failure_matrix_count = np.zeros((self.env.action_space.n, self.env.action_space.n))

## Init

In [29]:
dh = DataHandler()
broken_components = dh.get_sample_component_failure_pairs(5)
broken_components

[('_SFCdyucdEeet0YmmfbMwkw', 'CF3'),
 ('_SFLn5ecdEeet0YmmfbMwkw', 'CF3'),
 ('_SFOroOcdEeet0YmmfbMwkw', 'CF2'),
 ('_SFE6cOcdEeet0YmmfbMwkw', 'CF3'),
 ('_SEu7uucdEeet0YmmfbMwkw', 'CF3')]

In [30]:
env = BrokenComponentsEnv(broken_components)

In [31]:
env_approximator = EnvApproximator(env)

## Make approximation

### by number of steps
* improved_sampling=False uses the action sampling function of gym.env
* improved_sampling=False uses own action improved samping function,  
which exclude successful actions (no repair of already repaired components possible anymore)

In [32]:
env_approximator.reset()
env_approximator.fit_by_steps(5, improved_sampling=True, verbose=True)

Action:  0
Reward:  0
Broken Comp: [1. 1. 1. 1. 1.] 

Action:  0
Reward:  0
Broken Comp: [1. 1. 1. 1. 1.] 

Action:  1
Reward:  25.0
Broken Comp: [1. 0. 1. 1. 1.] 

Action:  3
Reward:  16.0
Broken Comp: [1. 0. 1. 0. 1.] 

Action:  2
Reward:  6.0
Broken Comp: [1. 0. 0. 0. 1.] 



### by number of episodes
* improved_sampling=False uses the action sampling function of gym.env
* improved_sampling=False uses own action improved samping function,  
which exclude successful actions (no repair of already repaired components possible anymore)

In [33]:
env_approximator.reset()
env_approximator.fit_by_episodes(5, improved_sampling=True, verbose=True)

Action:  3
Reward:  16.0
Broken Comp: [1. 1. 1. 0. 1.] 

Action:  2
Reward:  6.0
Broken Comp: [1. 1. 0. 0. 1.] 

Action:  1
Reward:  25.0
Broken Comp: [1. 0. 0. 0. 1.] 

Action:  4
Reward:  9.0
Broken Comp: [1. 0. 0. 0. 0.] 

Action:  0
Reward:  12.0
Broken Comp: [0. 0. 0. 0. 0.] 

Action:  2
Reward:  6.0
Broken Comp: [1. 1. 0. 1. 1.] 

Action:  3
Reward:  16.0
Broken Comp: [1. 1. 0. 0. 1.] 

Action:  0
Reward:  0
Broken Comp: [1. 1. 0. 0. 1.] 

Action:  4
Reward:  9.0
Broken Comp: [1. 1. 0. 0. 0.] 

Action:  0
Reward:  12.0
Broken Comp: [0. 1. 0. 0. 0.] 

Action:  1
Reward:  25.0
Broken Comp: [0. 0. 0. 0. 0.] 

Action:  4
Reward:  9.0
Broken Comp: [1. 1. 1. 1. 0.] 

Action:  3
Reward:  16.0
Broken Comp: [1. 1. 1. 0. 0.] 

Action:  1
Reward:  25.0
Broken Comp: [1. 0. 1. 0. 0.] 

Action:  0
Reward:  0
Broken Comp: [1. 0. 1. 0. 0.] 

Action:  0
Reward:  0
Broken Comp: [1. 0. 1. 0. 0.] 

Action:  2
Reward:  6.0
Broken Comp: [1. 0. 0. 0. 0.] 

Action:  0
Reward:  12.0
Broken Comp: [0. 0. 0

#### show failure count matrix and percentage failure matrix

In [34]:
env_approximator.failure_matrix_count

array([[ 0.,  1.,  3.,  1.,  3.],
       [-4.,  0., -3., -1., -2.],
       [-5., -2.,  0., -2., -3.],
       [-5., -4., -3.,  0., -3.],
       [-5., -3., -2., -2.,  0.]])

In [35]:
env_approximator.calc_failure_matrix()

array([[0.   , 0.125, 0.375, 0.125, 0.375],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ]])

In [36]:
env_approximator.calc_failure_matrix(as_df=True)

Unnamed: 0,_SFCdyucdEeet0YmmfbMwkw,_SFLn5ecdEeet0YmmfbMwkw,_SFOroOcdEeet0YmmfbMwkw,_SFE6cOcdEeet0YmmfbMwkw,_SEu7uucdEeet0YmmfbMwkw
_SFCdyucdEeet0YmmfbMwkw,0.0,0.125,0.375,0.125,0.375
_SFLn5ecdEeet0YmmfbMwkw,0.0,0.0,0.0,0.0,0.0
_SFOroOcdEeet0YmmfbMwkw,0.0,0.0,0.0,0.0,0.0
_SFE6cOcdEeet0YmmfbMwkw,0.0,0.0,0.0,0.0,0.0
_SEu7uucdEeet0YmmfbMwkw,0.0,0.0,0.0,0.0,0.0
