In [14]:
import gym
import random
import numpy as np
import itertools
from collections import namedtuple, deque
import pandas as pd


import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

from machine import Machine
from GymMachEnv import MachineEnv

Consider a full factorial design

In [2]:
params = {'max_kl' : [1e-06,1e-05,1e-04,1e-03],
          'gamma' : [0.8,0.9,0.95,0.99]}

import itertools
X1 = [0,1,2,3]
X2 = [0,1,2,3]
full_fact = []

number=1
for combination in itertools.product(X1,X2):
    full_fact.append(combination)
print(full_fact)

[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3), (3, 0), (3, 1), (3, 2), (3, 3)]


In [3]:
full_fact_params = [(params['max_kl'][x[0]],params['gamma'][x[1]]) for x in full_fact]
full_fact_params

[(1e-06, 0.8),
 (1e-06, 0.9),
 (1e-06, 0.95),
 (1e-06, 0.99),
 (1e-05, 0.8),
 (1e-05, 0.9),
 (1e-05, 0.95),
 (1e-05, 0.99),
 (0.0001, 0.8),
 (0.0001, 0.9),
 (0.0001, 0.95),
 (0.0001, 0.99),
 (0.001, 0.8),
 (0.001, 0.9),
 (0.001, 0.95),
 (0.001, 0.99)]

In [4]:
def train(params_array,folder_name):
    trial = 0
    for sets in params_array:
        print("trial ",trial)
#         if trial in [0,1,2]: 
#             trial+=1
#             continue
        evaluate_and_test(sets,trial,folder_name)
        trial+=1

In [5]:
Transition = namedtuple('Transition', ('state', 'next_state', 'action', 'reward', 'mask'))

class Memory(object):
    def __init__(self):
        self.memory = deque()

    def push(self, state, next_state, action, reward, mask):
        self.memory.append(Transition(state, next_state, action, reward, mask))

    def sample(self):
        memory = self.memory
        return Transition(*zip(*memory)) 

    def __len__(self):
        return len(self.memory)

In [6]:
def flat_grad(grads):
    grad_flatten = []
    for grad in grads:
        grad_flatten.append(grad.view(-1))
    grad_flatten = torch.cat(grad_flatten)
    return grad_flatten

def flat_hessian(hessians):
    hessians_flatten = []
    for hessian in hessians:
        hessians_flatten.append(hessian.contiguous().view(-1))
    hessians_flatten = torch.cat(hessians_flatten).data
    return hessians_flatten

def flat_params(model):
    params = []
    for param in model.parameters():
        params.append(param.data.view(-1))
    params_flatten = torch.cat(params)
    return params_flatten

def update_model(model, new_params):
    index = 0
    for params in model.parameters():
        params_length = len(params.view(-1))
        new_param = new_params[index: index + params_length]
        new_param = new_param.view(params.size())
        params.data.copy_(new_param)
        index += params_length

def kl_divergence(policy, old_policy):
    kl = old_policy * torch.log(old_policy / policy)

    kl = kl.sum(1, keepdim=True)
    return kl

def fisher_vector_product(net, states, p, cg_damp=0.1):
    policy = net(states)
    old_policy = net(states).detach()
    kl = kl_divergence(policy, old_policy)
    kl = kl.mean()
    kl_grad = torch.autograd.grad(kl, net.parameters(), create_graph=True) # create_graph is True if we need higher order derivative products
    kl_grad = flat_grad(kl_grad)

    kl_grad_p = (kl_grad * p.detach()).sum()
    kl_hessian_p = torch.autograd.grad(kl_grad_p, net.parameters())

        
    kl_hessian_p = flat_hessian(kl_hessian_p)

    return kl_hessian_p + cg_damp * p.detach()


def conjugate_gradient(net, states, loss_grad, n_step=10, residual_tol=1e-10):
    x = torch.zeros(loss_grad.size())
    r = loss_grad.clone()
    p = loss_grad.clone()
    r_dot_r = torch.dot(r, r)

    for i in range(n_step):
        A_dot_p = fisher_vector_product(net, states, p)
        alpha = r_dot_r / torch.dot(p, A_dot_p)
        x += alpha * p
        r -= alpha * A_dot_p
        new_r_dot_r = torch.dot(r,r)
        betta = new_r_dot_r / r_dot_r
        p = r + betta * p
        r_dot_r = new_r_dot_r
        if r_dot_r < residual_tol:
            break
    return x

def check_nan(x):
    return int(torch.isnan(x).sum())

def compute_avg_return(environment, policy, num_episodes):
    total_return = 0.0
    for _ in range(num_episodes):
        state = torch.Tensor(environment.reset())
        state = state.unsqueeze(0)
        episode_return = 0.0 
        while not environment.done:
            action = policy.get_action(state)
            next_state, reward, done, _ = environment.step(action)
            next_state = torch.Tensor(next_state)
            next_state = next_state.unsqueeze(0)
            state = next_state
            episode_return += reward
        total_return += episode_return   
    avg_return = total_return / num_episodes
    return avg_return# Evaluate the agent's policy once before training.

In [7]:
class TRPO(nn.Module):
    def __init__(self, num_inputs, num_outputs,params):
        super(TRPO, self).__init__()
        self.t = 0
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs

        self.fc_1 = nn.Linear(num_inputs, 128)
        self.fc_2 = nn.Linear(128, 64)
        self.fc_3 = nn.Linear(64, num_outputs)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)

    def forward(self, input):
        x = torch.relu(self.fc_1(input))
        x = torch.relu(self.fc_2(x))
#         policy = F.softmax(self.fc_3(x),dim=-1)
        x = F.log_softmax(self.fc_3(x),dim=-1)

        return torch.exp(x)
    
    def check(self,input):
        print("Input: ", input)
        x = torch.relu(self.fc_1(input))
        x = torch.relu(self.fc_2(x))
#         policy = F.softmax(self.fc_3(x),dim=-1)
#         print("Logit:",self.fc_3(x))
#         #x = F.log_softmax(self.fc_3(x),dim=-1)
#         print(vars(self.fc_1))
#         print(vars(self.fc_2))
#         print(vars(self.fc_3))

    @classmethod
    def train_model(cls, net, transitions,parameters):
        states, actions, rewards, masks = transitions.state, transitions.action, transitions.reward, transitions.mask

        states = torch.stack(states)
        actions = torch.stack(actions)
        rewards = torch.Tensor(rewards)
        masks = torch.Tensor(masks)

        returns = torch.zeros_like(rewards)

        running_return = 0
        for t in reversed(range(len(rewards))):
            running_return = rewards[t] + parameters[1] * running_return * masks[t]
            returns[t] = running_return

        policy = net(states)
        policy = policy.view(-1, net.num_outputs)
        policy_action = (policy * actions.detach()).sum(dim=1)

        old_policy = net(states).detach()
        old_policy = old_policy.view(-1, net.num_outputs)
        old_policy_action = (old_policy * actions.detach()).sum(dim=1)

        surrogate_loss = ((policy_action / old_policy_action) * returns).mean()

        surrogate_loss_grad = torch.autograd.grad(surrogate_loss, net.parameters())
        surrogate_loss_grad = flat_grad(surrogate_loss_grad)

        step_dir = conjugate_gradient(net, states, surrogate_loss_grad.data)

        params = flat_params(net)
        shs = (step_dir * fisher_vector_product(net, states, step_dir)).sum(0, keepdim=True)
        
        
        step_size = torch.sqrt((2 * parameters[0]) / shs)[0]
            
        full_step = step_size * step_dir

        fraction = 1.0
        
        #store = []
        
        for _ in range(10):
            new_params = params + fraction * full_step
            update_model(net, new_params)
            policy = net(states)
            policy = policy.view(-1, net.num_outputs)
            policy_action = (policy * actions.detach()).sum(dim=1)
            surrogate_loss = ((policy_action / old_policy_action) * returns).mean()

            kl = kl_divergence(policy, old_policy)
            kl = kl.mean()

            if kl < parameters[0]:
                break
            fraction = fraction * 0.5

        return -surrogate_loss

    def get_action(self, input):
        try:
            policy = Categorical(self.forward(input))

        
        except:
            print(self.forward(input))
            print("Nan occured, terminating")
            return -1
        
        action = policy.sample()
        
        return action

In [8]:
def running_score_cal(arr,window=50):
    array = np.array(arr)
    df = pd.DataFrame(array[:,0])
    run_score = df.rolling(50,min_periods=1).mean().values.flatten().tolist()
    return run_score[-1]

In [16]:
def evaluate_and_test(params,trial_number,folder_name):
    
    log_interval = 10 
    machine = Machine()
    machine.curr_state = 0
    env = MachineEnv(machine)
    
    machine2 = Machine()
    machine2.curr_state = 0
    env2 = MachineEnv(machine2)
    
    lost_arr = []

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.n

    net = TRPO(num_inputs, num_actions,params)

    net.train()

    best_score = 0
    running_score = 0
    Total_score = 0
    steps = 0
    loss = 0
    maintenance_count = 0
    score_repo = []
    eval_repo = []

    for e in range(10000):
        #print("point 1")
        done = False
        memory = Memory()

        score = 0
        
        
        state = env.reset()
        state = torch.Tensor(state)
        state = state.unsqueeze(0)

        while not done:
            steps += 1

            action = net.get_action(state)

            if action == -1: break

            if action == 1: maintenance_count+=1

            next_state, reward, done, _ = env.step(action)

            next_state = torch.Tensor(next_state)
            next_state = next_state.unsqueeze(0)

            mask = 0 if done else 1

            action_one_hot = torch.zeros(2)
            action_one_hot[action] = 1
            memory.push(state, next_state, action_one_hot, reward, mask)

            score += reward
            state = next_state
 

        if action == -1: #exploding gradient or nan occurs
            net.check(state)
            np.savetxt(f'{folder_name}/trial_{trial_number}.txt', np.array(score_repo), delimiter=',',fmt='%s')
            break

        loss = TRPO.train_model(net, memory.sample(),params)
        lost_arr.append(loss.item())

        Total_score+=score
        #print("Point 2")
        if e % log_interval == 0:
            score_repo.append([Total_score/log_interval,e])
            eval_repo.append(compute_avg_return(env2, net, 20))
            
            print('{} episode | Average score: {:.2f} | Average steps per episode: {}| Average number of maintenance: {}'.format(
                e, Total_score/log_interval,steps/log_interval, (maintenance_count/log_interval)))
            Total_score = 0
            maintenance_count=0
            steps = 0
            
            running_score = np.mean(np.array(score_repo)[-50:,0])
            
            if best_score < running_score and e > 100: #only start recording the highest point after 100 episodes
                best_score = running_score
                torch.save(net,f'{folder_name}/trial_{trial_number}_TRPO_agent.pt')
                
    #np.savetxt(f'{folder_name}/trial_{trial_number}.txt', np.array(score_repo), delimiter=',',fmt='%s')
    #np.savetxt(f'{folder_name}/loss_{trial_number}.txt', np.array(lost_arr), delimiter=',',fmt='%s')
    np.savetxt(f'{folder_name}/trial_{trial_number}.txt', np.array(eval_repo), delimiter=',',fmt='%s')
    torch.save(net,f'{folder_name}/final_trial_{trial_number}_TRPO_agent.pt')
    
    return 

In [17]:
evaluate_and_test([1e-5,0.99],0,'test')

0 episode | Average score: 270.00 | Average steps per episode: 0.9| Average number of maintenance: 0.5
10 episode | Average score: 21130.00 | Average steps per episode: 44.0| Average number of maintenance: 17.8
20 episode | Average score: 25590.00 | Average steps per episode: 49.9| Average number of maintenance: 19.1
30 episode | Average score: 15410.00 | Average steps per episode: 32.1| Average number of maintenance: 13.3
40 episode | Average score: 18700.00 | Average steps per episode: 43.4| Average number of maintenance: 16.9
50 episode | Average score: 28220.00 | Average steps per episode: 55.1| Average number of maintenance: 20.9
60 episode | Average score: 26070.00 | Average steps per episode: 51.5| Average number of maintenance: 19.4
70 episode | Average score: 21490.00 | Average steps per episode: 43.0| Average number of maintenance: 15.2
80 episode | Average score: 25160.00 | Average steps per episode: 54.2| Average number of maintenance: 21.4
90 episode | Average score: 29890

760 episode | Average score: 19370.00 | Average steps per episode: 44.9| Average number of maintenance: 18.6
770 episode | Average score: 22380.00 | Average steps per episode: 47.0| Average number of maintenance: 21.3
780 episode | Average score: 23880.00 | Average steps per episode: 49.3| Average number of maintenance: 20.6
790 episode | Average score: 20260.00 | Average steps per episode: 44.9| Average number of maintenance: 18.4
800 episode | Average score: 28120.00 | Average steps per episode: 54.9| Average number of maintenance: 23.1
810 episode | Average score: 24650.00 | Average steps per episode: 52.4| Average number of maintenance: 24.2
820 episode | Average score: 19300.00 | Average steps per episode: 42.7| Average number of maintenance: 19.7
830 episode | Average score: 21470.00 | Average steps per episode: 46.2| Average number of maintenance: 19.2
840 episode | Average score: 18220.00 | Average steps per episode: 36.0| Average number of maintenance: 14.9
850 episode | Avera

1510 episode | Average score: 23230.00 | Average steps per episode: 43.7| Average number of maintenance: 15.0
1520 episode | Average score: 16930.00 | Average steps per episode: 30.1| Average number of maintenance: 10.1
1530 episode | Average score: 18010.00 | Average steps per episode: 40.1| Average number of maintenance: 15.2
1540 episode | Average score: 8580.00 | Average steps per episode: 20.7| Average number of maintenance: 5.8
1550 episode | Average score: 15320.00 | Average steps per episode: 34.4| Average number of maintenance: 11.7
1560 episode | Average score: 25230.00 | Average steps per episode: 52.6| Average number of maintenance: 21.9
1570 episode | Average score: 26510.00 | Average steps per episode: 53.5| Average number of maintenance: 19.8
1580 episode | Average score: 19310.00 | Average steps per episode: 39.4| Average number of maintenance: 15.8
1590 episode | Average score: 16660.00 | Average steps per episode: 31.8| Average number of maintenance: 10.0
1600 episode

2260 episode | Average score: 13470.00 | Average steps per episode: 27.2| Average number of maintenance: 10.6
2270 episode | Average score: 10640.00 | Average steps per episode: 22.0| Average number of maintenance: 7.0
2280 episode | Average score: 23470.00 | Average steps per episode: 46.3| Average number of maintenance: 18.4
2290 episode | Average score: 23720.00 | Average steps per episode: 43.4| Average number of maintenance: 15.6
2300 episode | Average score: 23280.00 | Average steps per episode: 43.7| Average number of maintenance: 14.9
2310 episode | Average score: 16870.00 | Average steps per episode: 36.3| Average number of maintenance: 14.0
2320 episode | Average score: 19500.00 | Average steps per episode: 41.4| Average number of maintenance: 15.0
2330 episode | Average score: 26270.00 | Average steps per episode: 50.0| Average number of maintenance: 17.5
2340 episode | Average score: 14300.00 | Average steps per episode: 33.7| Average number of maintenance: 13.4
2350 episod

3010 episode | Average score: 21030.00 | Average steps per episode: 46.1| Average number of maintenance: 18.5
3020 episode | Average score: 24550.00 | Average steps per episode: 48.7| Average number of maintenance: 19.4
3030 episode | Average score: 19130.00 | Average steps per episode: 38.6| Average number of maintenance: 13.9
3040 episode | Average score: 20430.00 | Average steps per episode: 45.7| Average number of maintenance: 17.6
3050 episode | Average score: 13720.00 | Average steps per episode: 26.5| Average number of maintenance: 10.8
3060 episode | Average score: 21210.00 | Average steps per episode: 44.3| Average number of maintenance: 15.9
3070 episode | Average score: 8380.00 | Average steps per episode: 25.6| Average number of maintenance: 10.5
3080 episode | Average score: 23250.00 | Average steps per episode: 46.5| Average number of maintenance: 17.5
3090 episode | Average score: 25190.00 | Average steps per episode: 55.9| Average number of maintenance: 24.4
3100 episod

3760 episode | Average score: 19410.00 | Average steps per episode: 43.8| Average number of maintenance: 17.5
3770 episode | Average score: 30440.00 | Average steps per episode: 62.2| Average number of maintenance: 25.2
3780 episode | Average score: 10990.00 | Average steps per episode: 22.7| Average number of maintenance: 8.8
3790 episode | Average score: 32540.00 | Average steps per episode: 67.5| Average number of maintenance: 28.0
3800 episode | Average score: 14570.00 | Average steps per episode: 33.8| Average number of maintenance: 13.2
3810 episode | Average score: 25320.00 | Average steps per episode: 51.0| Average number of maintenance: 20.1
3820 episode | Average score: 26740.00 | Average steps per episode: 51.5| Average number of maintenance: 20.1
3830 episode | Average score: 22600.00 | Average steps per episode: 46.7| Average number of maintenance: 19.6
3840 episode | Average score: 25630.00 | Average steps per episode: 52.1| Average number of maintenance: 21.6
3850 episod

4510 episode | Average score: 17790.00 | Average steps per episode: 34.8| Average number of maintenance: 13.5
4520 episode | Average score: 24280.00 | Average steps per episode: 50.3| Average number of maintenance: 18.9
4530 episode | Average score: 18020.00 | Average steps per episode: 38.6| Average number of maintenance: 16.1
4540 episode | Average score: 25480.00 | Average steps per episode: 52.5| Average number of maintenance: 21.2
4550 episode | Average score: 28000.00 | Average steps per episode: 55.2| Average number of maintenance: 22.3
4560 episode | Average score: 31370.00 | Average steps per episode: 60.5| Average number of maintenance: 24.5
4570 episode | Average score: 20110.00 | Average steps per episode: 40.3| Average number of maintenance: 14.8
4580 episode | Average score: 26200.00 | Average steps per episode: 53.2| Average number of maintenance: 21.8
4590 episode | Average score: 26660.00 | Average steps per episode: 50.1| Average number of maintenance: 19.0
4600 episo

5260 episode | Average score: 25720.00 | Average steps per episode: 52.8| Average number of maintenance: 20.2
5270 episode | Average score: 30250.00 | Average steps per episode: 60.1| Average number of maintenance: 23.4
5280 episode | Average score: 25520.00 | Average steps per episode: 46.9| Average number of maintenance: 17.2
5290 episode | Average score: 20880.00 | Average steps per episode: 40.4| Average number of maintenance: 16.9
5300 episode | Average score: 27200.00 | Average steps per episode: 51.8| Average number of maintenance: 18.3
5310 episode | Average score: 20520.00 | Average steps per episode: 39.9| Average number of maintenance: 14.9
5320 episode | Average score: 33870.00 | Average steps per episode: 70.0| Average number of maintenance: 28.2
5330 episode | Average score: 28920.00 | Average steps per episode: 54.1| Average number of maintenance: 20.6
5340 episode | Average score: 25900.00 | Average steps per episode: 54.4| Average number of maintenance: 22.2
5350 episo

6010 episode | Average score: 26510.00 | Average steps per episode: 48.4| Average number of maintenance: 16.3
6020 episode | Average score: 38450.00 | Average steps per episode: 65.6| Average number of maintenance: 23.2
6030 episode | Average score: 29880.00 | Average steps per episode: 57.6| Average number of maintenance: 20.2
6040 episode | Average score: 30520.00 | Average steps per episode: 55.0| Average number of maintenance: 20.2
6050 episode | Average score: 21560.00 | Average steps per episode: 44.5| Average number of maintenance: 16.6
6060 episode | Average score: 28520.00 | Average steps per episode: 52.7| Average number of maintenance: 18.5
6070 episode | Average score: 22160.00 | Average steps per episode: 45.4| Average number of maintenance: 16.4
6080 episode | Average score: 34300.00 | Average steps per episode: 64.4| Average number of maintenance: 22.4
6090 episode | Average score: 28520.00 | Average steps per episode: 50.7| Average number of maintenance: 16.5
6100 episo

6760 episode | Average score: 18630.00 | Average steps per episode: 37.2| Average number of maintenance: 15.8
6770 episode | Average score: 23540.00 | Average steps per episode: 46.2| Average number of maintenance: 19.9
6780 episode | Average score: 13900.00 | Average steps per episode: 34.4| Average number of maintenance: 16.0
6790 episode | Average score: 24050.00 | Average steps per episode: 45.0| Average number of maintenance: 18.0
6800 episode | Average score: 22000.00 | Average steps per episode: 46.2| Average number of maintenance: 20.3
6810 episode | Average score: 26470.00 | Average steps per episode: 49.7| Average number of maintenance: 19.4
6820 episode | Average score: 35220.00 | Average steps per episode: 67.9| Average number of maintenance: 29.2
6830 episode | Average score: 36590.00 | Average steps per episode: 71.8| Average number of maintenance: 30.3
6840 episode | Average score: 15010.00 | Average steps per episode: 35.5| Average number of maintenance: 14.9
6850 episo

7510 episode | Average score: 23250.00 | Average steps per episode: 45.9| Average number of maintenance: 16.4
7520 episode | Average score: 30630.00 | Average steps per episode: 55.7| Average number of maintenance: 21.4
7530 episode | Average score: 34180.00 | Average steps per episode: 62.2| Average number of maintenance: 22.9
7540 episode | Average score: 29240.00 | Average steps per episode: 55.2| Average number of maintenance: 22.1
7550 episode | Average score: 29450.00 | Average steps per episode: 50.3| Average number of maintenance: 16.9
7560 episode | Average score: 36250.00 | Average steps per episode: 65.8| Average number of maintenance: 24.4
7570 episode | Average score: 26330.00 | Average steps per episode: 47.3| Average number of maintenance: 18.3
7580 episode | Average score: 20590.00 | Average steps per episode: 39.9| Average number of maintenance: 15.2
7590 episode | Average score: 19370.00 | Average steps per episode: 39.4| Average number of maintenance: 15.6
7600 episo

8260 episode | Average score: 41350.00 | Average steps per episode: 71.7| Average number of maintenance: 24.0
8270 episode | Average score: 25860.00 | Average steps per episode: 46.6| Average number of maintenance: 15.7
8280 episode | Average score: 21680.00 | Average steps per episode: 42.3| Average number of maintenance: 13.9
8290 episode | Average score: 23760.00 | Average steps per episode: 43.1| Average number of maintenance: 13.9
8300 episode | Average score: 26100.00 | Average steps per episode: 48.2| Average number of maintenance: 15.6
8310 episode | Average score: 34870.00 | Average steps per episode: 63.6| Average number of maintenance: 20.7
8320 episode | Average score: 24360.00 | Average steps per episode: 45.0| Average number of maintenance: 15.6
8330 episode | Average score: 22780.00 | Average steps per episode: 43.4| Average number of maintenance: 15.6
8340 episode | Average score: 23220.00 | Average steps per episode: 41.0| Average number of maintenance: 13.2
8350 episo

9010 episode | Average score: 21830.00 | Average steps per episode: 40.3| Average number of maintenance: 13.9
9020 episode | Average score: 16000.00 | Average steps per episode: 32.4| Average number of maintenance: 11.9
9030 episode | Average score: 43400.00 | Average steps per episode: 78.2| Average number of maintenance: 27.5
9040 episode | Average score: 29790.00 | Average steps per episode: 55.4| Average number of maintenance: 20.0
9050 episode | Average score: 23800.00 | Average steps per episode: 46.9| Average number of maintenance: 17.7
9060 episode | Average score: 28090.00 | Average steps per episode: 52.6| Average number of maintenance: 18.5
9070 episode | Average score: 17880.00 | Average steps per episode: 32.9| Average number of maintenance: 11.2
9080 episode | Average score: 26310.00 | Average steps per episode: 50.6| Average number of maintenance: 19.2
9090 episode | Average score: 39750.00 | Average steps per episode: 69.6| Average number of maintenance: 23.9
9100 episo

9760 episode | Average score: 31040.00 | Average steps per episode: 55.7| Average number of maintenance: 20.7
9770 episode | Average score: 21280.00 | Average steps per episode: 42.7| Average number of maintenance: 15.8
9780 episode | Average score: 30800.00 | Average steps per episode: 59.2| Average number of maintenance: 24.5
9790 episode | Average score: 32540.00 | Average steps per episode: 65.8| Average number of maintenance: 24.8
9800 episode | Average score: 19890.00 | Average steps per episode: 39.4| Average number of maintenance: 15.7
9810 episode | Average score: 30190.00 | Average steps per episode: 57.9| Average number of maintenance: 21.8
9820 episode | Average score: 18560.00 | Average steps per episode: 38.7| Average number of maintenance: 16.4
9830 episode | Average score: 22130.00 | Average steps per episode: 53.5| Average number of maintenance: 23.9
9840 episode | Average score: 29390.00 | Average steps per episode: 57.5| Average number of maintenance: 22.0
9850 episo

In [None]:
folder_name = 'run_2'
train(full_fact_params,folder_name)

trial  0
0 episode | Average score: 4890.00 | Average steps per episode: 10.0| Average number of maintenance: 5.6
10 episode | Average score: 20790.00 | Average steps per episode: 62.0| Average number of maintenance: 36.8
20 episode | Average score: 22130.00 | Average steps per episode: 57.6| Average number of maintenance: 35.3
30 episode | Average score: 24700.00 | Average steps per episode: 67.5| Average number of maintenance: 37.9
40 episode | Average score: 30830.00 | Average steps per episode: 69.1| Average number of maintenance: 36.7
50 episode | Average score: 28560.00 | Average steps per episode: 63.6| Average number of maintenance: 33.0
60 episode | Average score: 16110.00 | Average steps per episode: 41.5| Average number of maintenance: 23.8
70 episode | Average score: 15690.00 | Average steps per episode: 42.6| Average number of maintenance: 22.9
80 episode | Average score: 25180.00 | Average steps per episode: 61.0| Average number of maintenance: 33.5
90 episode | Average s

760 episode | Average score: 21030.00 | Average steps per episode: 42.9| Average number of maintenance: 15.9
770 episode | Average score: 14790.00 | Average steps per episode: 31.2| Average number of maintenance: 11.5
780 episode | Average score: 26830.00 | Average steps per episode: 50.5| Average number of maintenance: 16.8
790 episode | Average score: 18930.00 | Average steps per episode: 36.5| Average number of maintenance: 9.8
800 episode | Average score: 22150.00 | Average steps per episode: 43.7| Average number of maintenance: 14.8
810 episode | Average score: 9770.00 | Average steps per episode: 19.4| Average number of maintenance: 6.4
820 episode | Average score: 23950.00 | Average steps per episode: 43.5| Average number of maintenance: 15.1
830 episode | Average score: 22020.00 | Average steps per episode: 43.6| Average number of maintenance: 16.1
840 episode | Average score: 13320.00 | Average steps per episode: 26.4| Average number of maintenance: 7.8
850 episode | Average s

1520 episode | Average score: 10490.00 | Average steps per episode: 21.1| Average number of maintenance: 5.0
1530 episode | Average score: 12440.00 | Average steps per episode: 24.8| Average number of maintenance: 6.8
1540 episode | Average score: 18690.00 | Average steps per episode: 34.3| Average number of maintenance: 8.7
1550 episode | Average score: 17830.00 | Average steps per episode: 29.6| Average number of maintenance: 7.7
1560 episode | Average score: 9370.00 | Average steps per episode: 20.8| Average number of maintenance: 5.4
1570 episode | Average score: 11430.00 | Average steps per episode: 24.2| Average number of maintenance: 6.0
1580 episode | Average score: 12600.00 | Average steps per episode: 23.9| Average number of maintenance: 6.1
1590 episode | Average score: 6740.00 | Average steps per episode: 16.1| Average number of maintenance: 3.2
1600 episode | Average score: 11370.00 | Average steps per episode: 24.0| Average number of maintenance: 7.2
1610 episode | Averag

2280 episode | Average score: 10720.00 | Average steps per episode: 20.0| Average number of maintenance: 4.1
2290 episode | Average score: 8360.00 | Average steps per episode: 16.0| Average number of maintenance: 3.3
2300 episode | Average score: 17620.00 | Average steps per episode: 33.2| Average number of maintenance: 8.3
2310 episode | Average score: 18350.00 | Average steps per episode: 31.9| Average number of maintenance: 7.5
2320 episode | Average score: 7690.00 | Average steps per episode: 15.9| Average number of maintenance: 3.4
2330 episode | Average score: 17730.00 | Average steps per episode: 33.5| Average number of maintenance: 8.3
2340 episode | Average score: 13860.00 | Average steps per episode: 25.3| Average number of maintenance: 6.3
2350 episode | Average score: 15110.00 | Average steps per episode: 29.1| Average number of maintenance: 7.2
2360 episode | Average score: 11940.00 | Average steps per episode: 24.0| Average number of maintenance: 4.9
2370 episode | Averag

3040 episode | Average score: 9650.00 | Average steps per episode: 17.6| Average number of maintenance: 2.2
3050 episode | Average score: 15360.00 | Average steps per episode: 27.7| Average number of maintenance: 6.2
3060 episode | Average score: 7380.00 | Average steps per episode: 15.8| Average number of maintenance: 3.9
3070 episode | Average score: 11170.00 | Average steps per episode: 20.6| Average number of maintenance: 3.7
3080 episode | Average score: 16140.00 | Average steps per episode: 29.6| Average number of maintenance: 7.0
3090 episode | Average score: 13680.00 | Average steps per episode: 23.4| Average number of maintenance: 3.5
3100 episode | Average score: 18060.00 | Average steps per episode: 31.6| Average number of maintenance: 7.3
3110 episode | Average score: 27570.00 | Average steps per episode: 46.6| Average number of maintenance: 10.6
3120 episode | Average score: 8620.00 | Average steps per episode: 17.7| Average number of maintenance: 2.3
3130 episode | Averag

3800 episode | Average score: 9520.00 | Average steps per episode: 19.3| Average number of maintenance: 3.0
3810 episode | Average score: 12630.00 | Average steps per episode: 22.6| Average number of maintenance: 4.3
3820 episode | Average score: 6540.00 | Average steps per episode: 15.6| Average number of maintenance: 3.1
3830 episode | Average score: 10640.00 | Average steps per episode: 20.4| Average number of maintenance: 3.9
3840 episode | Average score: 4430.00 | Average steps per episode: 12.1| Average number of maintenance: 2.2
3850 episode | Average score: 22820.00 | Average steps per episode: 40.4| Average number of maintenance: 8.8
3860 episode | Average score: 7790.00 | Average steps per episode: 15.4| Average number of maintenance: 3.1
3870 episode | Average score: 12820.00 | Average steps per episode: 24.6| Average number of maintenance: 5.7
3880 episode | Average score: 8230.00 | Average steps per episode: 18.0| Average number of maintenance: 3.2
3890 episode | Average s

4560 episode | Average score: 7700.00 | Average steps per episode: 15.6| Average number of maintenance: 2.8
4570 episode | Average score: 9320.00 | Average steps per episode: 19.3| Average number of maintenance: 3.3
4580 episode | Average score: 13310.00 | Average steps per episode: 23.7| Average number of maintenance: 4.5
4590 episode | Average score: 15360.00 | Average steps per episode: 28.9| Average number of maintenance: 6.1
4600 episode | Average score: 7000.00 | Average steps per episode: 15.1| Average number of maintenance: 3.1
4610 episode | Average score: 16240.00 | Average steps per episode: 28.1| Average number of maintenance: 4.4
4620 episode | Average score: 14380.00 | Average steps per episode: 28.1| Average number of maintenance: 5.5
4630 episode | Average score: 14710.00 | Average steps per episode: 27.2| Average number of maintenance: 6.6
4640 episode | Average score: 10460.00 | Average steps per episode: 18.8| Average number of maintenance: 3.1
4650 episode | Average

5320 episode | Average score: 17880.00 | Average steps per episode: 29.8| Average number of maintenance: 6.6
5330 episode | Average score: 9970.00 | Average steps per episode: 19.0| Average number of maintenance: 4.4
5340 episode | Average score: 8120.00 | Average steps per episode: 16.6| Average number of maintenance: 2.7
5350 episode | Average score: 9220.00 | Average steps per episode: 19.5| Average number of maintenance: 4.1
5360 episode | Average score: 12390.00 | Average steps per episode: 22.6| Average number of maintenance: 4.4
5370 episode | Average score: 14120.00 | Average steps per episode: 22.4| Average number of maintenance: 3.2
5380 episode | Average score: 11340.00 | Average steps per episode: 20.2| Average number of maintenance: 3.2
5390 episode | Average score: 11610.00 | Average steps per episode: 19.9| Average number of maintenance: 3.4
5400 episode | Average score: 10000.00 | Average steps per episode: 19.6| Average number of maintenance: 2.7
5410 episode | Average

6080 episode | Average score: 16680.00 | Average steps per episode: 30.3| Average number of maintenance: 6.3
6090 episode | Average score: 9870.00 | Average steps per episode: 20.2| Average number of maintenance: 3.3
6100 episode | Average score: 5840.00 | Average steps per episode: 13.8| Average number of maintenance: 2.6
6110 episode | Average score: 11410.00 | Average steps per episode: 22.4| Average number of maintenance: 5.3
6120 episode | Average score: 13770.00 | Average steps per episode: 27.3| Average number of maintenance: 5.7
6130 episode | Average score: 10830.00 | Average steps per episode: 22.1| Average number of maintenance: 4.3
6140 episode | Average score: 12750.00 | Average steps per episode: 25.5| Average number of maintenance: 4.6
6150 episode | Average score: 8410.00 | Average steps per episode: 18.0| Average number of maintenance: 4.0
6160 episode | Average score: 8070.00 | Average steps per episode: 17.1| Average number of maintenance: 3.0
6170 episode | Average 

6840 episode | Average score: 9040.00 | Average steps per episode: 18.0| Average number of maintenance: 2.9
6850 episode | Average score: 18150.00 | Average steps per episode: 32.0| Average number of maintenance: 6.5
6860 episode | Average score: 14420.00 | Average steps per episode: 26.4| Average number of maintenance: 4.9
6870 episode | Average score: 9740.00 | Average steps per episode: 17.6| Average number of maintenance: 2.9
6880 episode | Average score: 9850.00 | Average steps per episode: 19.0| Average number of maintenance: 2.8
6890 episode | Average score: 15480.00 | Average steps per episode: 27.8| Average number of maintenance: 5.8
6900 episode | Average score: 9910.00 | Average steps per episode: 18.1| Average number of maintenance: 2.7
6910 episode | Average score: 14380.00 | Average steps per episode: 28.4| Average number of maintenance: 5.5
6920 episode | Average score: 9290.00 | Average steps per episode: 20.0| Average number of maintenance: 3.7
6930 episode | Average s

7600 episode | Average score: 9630.00 | Average steps per episode: 20.9| Average number of maintenance: 4.2
7610 episode | Average score: 10460.00 | Average steps per episode: 19.9| Average number of maintenance: 3.7
7620 episode | Average score: 18720.00 | Average steps per episode: 33.6| Average number of maintenance: 7.2
7630 episode | Average score: 20740.00 | Average steps per episode: 34.2| Average number of maintenance: 8.0
7640 episode | Average score: 29990.00 | Average steps per episode: 47.4| Average number of maintenance: 11.6
7650 episode | Average score: 8350.00 | Average steps per episode: 16.8| Average number of maintenance: 3.5
7660 episode | Average score: 21220.00 | Average steps per episode: 40.8| Average number of maintenance: 9.6
7670 episode | Average score: 10050.00 | Average steps per episode: 20.8| Average number of maintenance: 3.7
7680 episode | Average score: 11740.00 | Average steps per episode: 21.0| Average number of maintenance: 4.2
7690 episode | Avera

8360 episode | Average score: 6050.00 | Average steps per episode: 14.3| Average number of maintenance: 3.1
8370 episode | Average score: 12340.00 | Average steps per episode: 23.7| Average number of maintenance: 5.2
8380 episode | Average score: 8070.00 | Average steps per episode: 17.9| Average number of maintenance: 3.8
8390 episode | Average score: 25700.00 | Average steps per episode: 45.6| Average number of maintenance: 11.7
8400 episode | Average score: 11460.00 | Average steps per episode: 20.8| Average number of maintenance: 3.8
8410 episode | Average score: 20120.00 | Average steps per episode: 38.7| Average number of maintenance: 9.0
8420 episode | Average score: 14860.00 | Average steps per episode: 26.1| Average number of maintenance: 3.7
8430 episode | Average score: 8320.00 | Average steps per episode: 18.5| Average number of maintenance: 4.6
8440 episode | Average score: 15680.00 | Average steps per episode: 29.4| Average number of maintenance: 6.8
8450 episode | Averag

9120 episode | Average score: 14700.00 | Average steps per episode: 27.2| Average number of maintenance: 5.5
9130 episode | Average score: 23830.00 | Average steps per episode: 42.7| Average number of maintenance: 10.9
9140 episode | Average score: 11480.00 | Average steps per episode: 22.9| Average number of maintenance: 4.1
9150 episode | Average score: 9750.00 | Average steps per episode: 18.0| Average number of maintenance: 4.0
9160 episode | Average score: 10930.00 | Average steps per episode: 21.8| Average number of maintenance: 4.2
9170 episode | Average score: 10070.00 | Average steps per episode: 19.7| Average number of maintenance: 4.1
9180 episode | Average score: 25880.00 | Average steps per episode: 43.9| Average number of maintenance: 8.8
9190 episode | Average score: 16120.00 | Average steps per episode: 26.9| Average number of maintenance: 5.5
9200 episode | Average score: 20460.00 | Average steps per episode: 32.9| Average number of maintenance: 6.9
9210 episode | Aver

In [12]:
default_params = [1e-4,0.95]
best_params = [1e-5,0.99]

evaluate_and_test(best_params,0,'best')


0 episode | Average score: 1220.00 | Average steps per episode: 3.5| Average number of maintenance: 1.9
10 episode | Average score: 11470.00 | Average steps per episode: 28.3| Average number of maintenance: 13.1
20 episode | Average score: 13630.00 | Average steps per episode: 31.6| Average number of maintenance: 14.8
30 episode | Average score: 26010.00 | Average steps per episode: 52.8| Average number of maintenance: 24.0
40 episode | Average score: 36650.00 | Average steps per episode: 73.1| Average number of maintenance: 33.2
50 episode | Average score: 16740.00 | Average steps per episode: 35.1| Average number of maintenance: 15.3
60 episode | Average score: 33350.00 | Average steps per episode: 71.0| Average number of maintenance: 34.8
70 episode | Average score: 23840.00 | Average steps per episode: 48.6| Average number of maintenance: 19.8
80 episode | Average score: 23000.00 | Average steps per episode: 51.7| Average number of maintenance: 23.4
90 episode | Average score: 1982

760 episode | Average score: 15260.00 | Average steps per episode: 35.8| Average number of maintenance: 15.8
770 episode | Average score: 18700.00 | Average steps per episode: 40.4| Average number of maintenance: 16.7
780 episode | Average score: 30900.00 | Average steps per episode: 61.5| Average number of maintenance: 25.7
790 episode | Average score: 23140.00 | Average steps per episode: 48.8| Average number of maintenance: 21.0
800 episode | Average score: 17650.00 | Average steps per episode: 36.5| Average number of maintenance: 14.2
810 episode | Average score: 18420.00 | Average steps per episode: 39.5| Average number of maintenance: 17.3
820 episode | Average score: 19290.00 | Average steps per episode: 40.1| Average number of maintenance: 17.2
830 episode | Average score: 21670.00 | Average steps per episode: 45.9| Average number of maintenance: 19.9
840 episode | Average score: 22870.00 | Average steps per episode: 48.2| Average number of maintenance: 19.6
850 episode | Avera

1510 episode | Average score: 20840.00 | Average steps per episode: 43.1| Average number of maintenance: 17.3
1520 episode | Average score: 28340.00 | Average steps per episode: 56.6| Average number of maintenance: 21.8
1530 episode | Average score: 15860.00 | Average steps per episode: 35.2| Average number of maintenance: 14.7
1540 episode | Average score: 7270.00 | Average steps per episode: 17.3| Average number of maintenance: 7.1
1550 episode | Average score: 22630.00 | Average steps per episode: 45.2| Average number of maintenance: 17.4
1560 episode | Average score: 23070.00 | Average steps per episode: 50.4| Average number of maintenance: 20.8
1570 episode | Average score: 21350.00 | Average steps per episode: 44.4| Average number of maintenance: 18.3
1580 episode | Average score: 23160.00 | Average steps per episode: 47.8| Average number of maintenance: 20.7
1590 episode | Average score: 12830.00 | Average steps per episode: 25.4| Average number of maintenance: 9.2
1600 episode 

2260 episode | Average score: 21560.00 | Average steps per episode: 41.3| Average number of maintenance: 15.6
2270 episode | Average score: 23890.00 | Average steps per episode: 45.9| Average number of maintenance: 17.6
2280 episode | Average score: 26170.00 | Average steps per episode: 52.9| Average number of maintenance: 19.0
2290 episode | Average score: 16190.00 | Average steps per episode: 38.9| Average number of maintenance: 15.5
2300 episode | Average score: 16680.00 | Average steps per episode: 36.3| Average number of maintenance: 15.5
2310 episode | Average score: 10190.00 | Average steps per episode: 22.8| Average number of maintenance: 7.9
2320 episode | Average score: 21280.00 | Average steps per episode: 42.4| Average number of maintenance: 15.0
2330 episode | Average score: 18990.00 | Average steps per episode: 38.1| Average number of maintenance: 13.7
2340 episode | Average score: 21320.00 | Average steps per episode: 39.4| Average number of maintenance: 13.4
2350 episod

3010 episode | Average score: 19180.00 | Average steps per episode: 43.4| Average number of maintenance: 19.5
3020 episode | Average score: 29240.00 | Average steps per episode: 60.3| Average number of maintenance: 27.0
3030 episode | Average score: 24010.00 | Average steps per episode: 51.4| Average number of maintenance: 23.5
3040 episode | Average score: 20210.00 | Average steps per episode: 42.0| Average number of maintenance: 17.2
3050 episode | Average score: 19540.00 | Average steps per episode: 38.3| Average number of maintenance: 15.0
3060 episode | Average score: 23940.00 | Average steps per episode: 51.9| Average number of maintenance: 22.9
3070 episode | Average score: 16240.00 | Average steps per episode: 33.8| Average number of maintenance: 13.7
3080 episode | Average score: 15800.00 | Average steps per episode: 33.4| Average number of maintenance: 14.0
3090 episode | Average score: 17190.00 | Average steps per episode: 36.1| Average number of maintenance: 15.1
3100 episo

3760 episode | Average score: 22250.00 | Average steps per episode: 43.2| Average number of maintenance: 16.7
3770 episode | Average score: 21420.00 | Average steps per episode: 43.4| Average number of maintenance: 16.5
3780 episode | Average score: 19150.00 | Average steps per episode: 37.5| Average number of maintenance: 13.5
3790 episode | Average score: 19510.00 | Average steps per episode: 37.8| Average number of maintenance: 12.5
3800 episode | Average score: 24090.00 | Average steps per episode: 45.1| Average number of maintenance: 16.5
3810 episode | Average score: 23220.00 | Average steps per episode: 48.5| Average number of maintenance: 18.0
3820 episode | Average score: 24350.00 | Average steps per episode: 49.4| Average number of maintenance: 18.1
3830 episode | Average score: 15010.00 | Average steps per episode: 35.3| Average number of maintenance: 14.6
3840 episode | Average score: 13270.00 | Average steps per episode: 27.4| Average number of maintenance: 10.2
3850 episo

4510 episode | Average score: 29120.00 | Average steps per episode: 53.4| Average number of maintenance: 20.4
4520 episode | Average score: 23400.00 | Average steps per episode: 44.6| Average number of maintenance: 15.4
4530 episode | Average score: 14500.00 | Average steps per episode: 26.8| Average number of maintenance: 10.0
4540 episode | Average score: 12720.00 | Average steps per episode: 26.0| Average number of maintenance: 9.3
4550 episode | Average score: 23670.00 | Average steps per episode: 43.6| Average number of maintenance: 15.4
4560 episode | Average score: 20600.00 | Average steps per episode: 42.6| Average number of maintenance: 16.1
4570 episode | Average score: 28070.00 | Average steps per episode: 52.2| Average number of maintenance: 20.1
4580 episode | Average score: 16570.00 | Average steps per episode: 32.1| Average number of maintenance: 11.8
4590 episode | Average score: 23630.00 | Average steps per episode: 41.0| Average number of maintenance: 13.8
4600 episod

5260 episode | Average score: 24370.00 | Average steps per episode: 43.0| Average number of maintenance: 14.4
5270 episode | Average score: 26290.00 | Average steps per episode: 47.1| Average number of maintenance: 16.2
5280 episode | Average score: 19690.00 | Average steps per episode: 35.7| Average number of maintenance: 12.8
5290 episode | Average score: 28920.00 | Average steps per episode: 56.9| Average number of maintenance: 20.5
5300 episode | Average score: 14530.00 | Average steps per episode: 27.9| Average number of maintenance: 9.8
5310 episode | Average score: 26500.00 | Average steps per episode: 52.4| Average number of maintenance: 19.4
5320 episode | Average score: 30070.00 | Average steps per episode: 60.2| Average number of maintenance: 23.6
5330 episode | Average score: 23620.00 | Average steps per episode: 48.0| Average number of maintenance: 18.3
5340 episode | Average score: 15580.00 | Average steps per episode: 30.8| Average number of maintenance: 11.6
5350 episod

6010 episode | Average score: 11740.00 | Average steps per episode: 26.1| Average number of maintenance: 10.2
6020 episode | Average score: 34230.00 | Average steps per episode: 64.8| Average number of maintenance: 22.7
6030 episode | Average score: 19070.00 | Average steps per episode: 36.6| Average number of maintenance: 14.0
6040 episode | Average score: 29010.00 | Average steps per episode: 55.2| Average number of maintenance: 18.8
6050 episode | Average score: 16860.00 | Average steps per episode: 32.5| Average number of maintenance: 11.2
6060 episode | Average score: 16290.00 | Average steps per episode: 32.8| Average number of maintenance: 11.2
6070 episode | Average score: 27360.00 | Average steps per episode: 51.5| Average number of maintenance: 17.7
6080 episode | Average score: 22740.00 | Average steps per episode: 43.0| Average number of maintenance: 15.9
6090 episode | Average score: 23140.00 | Average steps per episode: 44.5| Average number of maintenance: 16.0
6100 episo

6760 episode | Average score: 25410.00 | Average steps per episode: 48.6| Average number of maintenance: 18.3
6770 episode | Average score: 28150.00 | Average steps per episode: 52.6| Average number of maintenance: 20.4
6780 episode | Average score: 22160.00 | Average steps per episode: 43.8| Average number of maintenance: 16.3
6790 episode | Average score: 36460.00 | Average steps per episode: 62.2| Average number of maintenance: 22.0
6800 episode | Average score: 31320.00 | Average steps per episode: 59.4| Average number of maintenance: 21.8
6810 episode | Average score: 40020.00 | Average steps per episode: 68.6| Average number of maintenance: 24.6
6820 episode | Average score: 22920.00 | Average steps per episode: 41.8| Average number of maintenance: 14.4
6830 episode | Average score: 32490.00 | Average steps per episode: 58.6| Average number of maintenance: 21.2
6840 episode | Average score: 41100.00 | Average steps per episode: 75.4| Average number of maintenance: 27.2
6850 episo

7510 episode | Average score: 30990.00 | Average steps per episode: 54.8| Average number of maintenance: 18.4
7520 episode | Average score: 27870.00 | Average steps per episode: 53.8| Average number of maintenance: 18.3
7530 episode | Average score: 22900.00 | Average steps per episode: 45.9| Average number of maintenance: 17.4
7540 episode | Average score: 23820.00 | Average steps per episode: 48.8| Average number of maintenance: 18.1
7550 episode | Average score: 24120.00 | Average steps per episode: 44.4| Average number of maintenance: 15.1
7560 episode | Average score: 19030.00 | Average steps per episode: 39.2| Average number of maintenance: 15.9
7570 episode | Average score: 24700.00 | Average steps per episode: 48.0| Average number of maintenance: 18.2
7580 episode | Average score: 23600.00 | Average steps per episode: 44.8| Average number of maintenance: 15.5
7590 episode | Average score: 20220.00 | Average steps per episode: 40.8| Average number of maintenance: 15.2
7600 episo

8260 episode | Average score: 28150.00 | Average steps per episode: 54.1| Average number of maintenance: 19.1
8270 episode | Average score: 35480.00 | Average steps per episode: 65.2| Average number of maintenance: 21.5
8280 episode | Average score: 14570.00 | Average steps per episode: 32.8| Average number of maintenance: 10.3
8290 episode | Average score: 31570.00 | Average steps per episode: 57.8| Average number of maintenance: 18.4
8300 episode | Average score: 18880.00 | Average steps per episode: 33.7| Average number of maintenance: 10.7
8310 episode | Average score: 34140.00 | Average steps per episode: 58.3| Average number of maintenance: 18.9
8320 episode | Average score: 19770.00 | Average steps per episode: 37.7| Average number of maintenance: 13.9
8330 episode | Average score: 16510.00 | Average steps per episode: 31.7| Average number of maintenance: 12.0
8340 episode | Average score: 22380.00 | Average steps per episode: 42.7| Average number of maintenance: 16.0
8350 episo

9010 episode | Average score: 28710.00 | Average steps per episode: 51.5| Average number of maintenance: 19.2
9020 episode | Average score: 19810.00 | Average steps per episode: 38.5| Average number of maintenance: 13.4
9030 episode | Average score: 23680.00 | Average steps per episode: 46.2| Average number of maintenance: 16.6
9040 episode | Average score: 26980.00 | Average steps per episode: 48.7| Average number of maintenance: 17.4
9050 episode | Average score: 26750.00 | Average steps per episode: 53.6| Average number of maintenance: 20.6
9060 episode | Average score: 31380.00 | Average steps per episode: 63.1| Average number of maintenance: 24.7
9070 episode | Average score: 23840.00 | Average steps per episode: 44.4| Average number of maintenance: 16.8
9080 episode | Average score: 33320.00 | Average steps per episode: 63.6| Average number of maintenance: 23.4
9090 episode | Average score: 28140.00 | Average steps per episode: 52.6| Average number of maintenance: 19.9
9100 episo

9760 episode | Average score: 18630.00 | Average steps per episode: 34.0| Average number of maintenance: 11.0
9770 episode | Average score: 27080.00 | Average steps per episode: 52.8| Average number of maintenance: 17.0
9780 episode | Average score: 30610.00 | Average steps per episode: 51.9| Average number of maintenance: 15.6
9790 episode | Average score: 43700.00 | Average steps per episode: 78.8| Average number of maintenance: 27.7
9800 episode | Average score: 29720.00 | Average steps per episode: 53.3| Average number of maintenance: 19.1
9810 episode | Average score: 37420.00 | Average steps per episode: 65.5| Average number of maintenance: 23.1
9820 episode | Average score: 26940.00 | Average steps per episode: 51.4| Average number of maintenance: 18.2
9830 episode | Average score: 14770.00 | Average steps per episode: 28.2| Average number of maintenance: 9.4
9840 episode | Average score: 26020.00 | Average steps per episode: 48.4| Average number of maintenance: 16.5
9850 episod