In [1]:
# -*- coding: utf-8 -*-
# <nbformat>4</nbformat>
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.distributions import Categorical
import numpy as np
import gym
from rubiks_cube import RubiksCube, RubiksAction

In [2]:
class Actor(nn.Module):
    def __init__(self, state_space_dim, action_space_dim,
                 actor_lr=0.01):
        """
        Actor agent. This is the agent that outputs a probability distribution over actions
        for a given input state
        :param env: The environment the agent will try to solve.
        :return: The instantiated Actor agent object.
        """
        super().__init__()
        self.action_space_dim = action_space_dim
        self.state_space_dim = state_space_dim
        self.lin1 = nn.Linear(self.state_space_dim, 24, bias=False)
        self.lin2 = nn.Linear(24, self.action_space_dim, bias=False)     
        self.optimizer = optim.Adam(self.parameters(), lr=actor_lr)
        
    def forward(self, x):
        """
        forward computation pass. Perform forward computation from input to output
        :param x: the state input.
        :return: The computed vector output of shape (self.state_space_dim,1).
        """
        x = self.lin1(x)
        x = self.lin2(x)
        x = nn.Softmax()(x)
        return x
    
    def update_actor(self, 
                 prev_rewards, 
                 prev_logs_probs, 
                 values_critic, 
                 terminal_state=False):
        """
        the update_actor function. This is the function that updates the weights of the actor
        agent so as to raise the probability to pick up actions that will improve, on average,
        the current expected cumulated reward with respect to the current policy i.e 
        the current probability distribution over actions 
        :param prev_rewards: torch.Tensor of shape (episode_length, 1), rewards cumulated over the episode.
        :param values_critic: list of torch.Tensor, expected cumulated rewards with respect to the states encountered during the episode, under the current critic agent.
        :param terminal_state (default: False): boolean, says if last cumulated state is terminal or not.
        :return: Nothing. Update of the actor's weights is done inplace.
        """
        sa_value_samples = []
        discount_factor = 0.9
        vfn_last = 0 if terminal_state else float(values_critic[-1])
        sa_value_sample = vfn_last
        # Discount future rewards back to the present using discount_factor
        discountable_rewards = reversed(list(prev_rewards[:-1]))
        for reward in discountable_rewards:
            sa_value_sample = reward + discount_factor * sa_value_sample
            sa_value_samples.append(sa_value_sample)
        sa_value_samples = sa_value_samples[::-1]
        sa_value_samples = torch.FloatTensor(sa_value_samples)
        
        values_critic = torch.cat(values_critic[:-1])
        prev_logs_probs = torch.cat(list(prev_logs_probs[:-1]))
        assert len(sa_value_samples) == len(values_critic) == len(prev_logs_probs)
        advantages = Variable(sa_value_samples) - values_critic
        loss = prev_logs_probs * advantages
        loss = sum(loss)*-1

        self.optimizer.zero_grad()
        loss.backward(retain_graph=True)
        self.optimizer.step()

class Critic(nn.Module):
    def __init__(self, state_space_dim, action_space_dim,
                 critic_lr=0.01):
        """
        Critic agent. This is the agent that outputs a scalar reprensenting the expected cumulated reward
        with respect to the input state and the current policy. To be combined with the Actor agent.
        :param env: The environment the agent will try to solve.
        :return: The instantiated Critic agent object.
        """
        super().__init__()
        self.state_space_dim = state_space_dim
        self.lin1 = nn.Linear(self.state_space_dim, 24, bias=False)
        self.lin2 = nn.Linear(24, 1, bias=False)
        self.optimizer = optim.Adam(self.parameters(), lr=critic_lr)
    
    def forward(self, x):
        """
        Forward computation pass. Perform forward computation from input to output.
        :param x: the state input.
        :return: The computed vector output.
        """
        x = self.lin1(x)
        x = nn.ReLU()(x)
        x = self.lin2(x)
        return x

    def update_critic(self,
                 prev_rewards,
                 values_critic,
                 terminal_state=False):
        """
        the update_critic function. This is the function that updates the weights of the critic agent so as
        to reduce the distance between predicted expected rewards under current policy and actually observed
        cumulated rewards under current policy for a given state.
        :param prev_rewards: torch.Tensor of shape (episode_length, 1), rewards cumulated over the episode.
        : param prev_logs_probs: torch.Tensor of shape (episode_length, 1), logarithm over probabilities of picked-up actions over the episode.
        :param values_critic: list of torch.Tensor, expected cumulated rewards with respect to the states encountered during the episode, under the current critic agent.
        :param terminal_state (default: False): boolean, says if last cumulated state is terminal or not.
        :return: Nothing. Update of the actor's weights is done inplace.
        """
        if len(prev_rewards) != len(values_critic):
            excep_msg = 'prev_rewards len : {} and values_critic len'
            excep_msg += ': {} should be equal : {} != {}'
            len1, len2 = len(prev_rewards), len(values_critic)
            raise Exception(excep_msg.format(l1,l2,l1,l2))
        better_values = []
        discount_factor = 0.9
        better_value_last =  0 if terminal_state else values_critic[-1]
        better_value = float(better_value_last)
        # Discount future rewards back to the present using discount_factor
        discountable_rewards = reversed(list(prev_rewards[:-1]))
        for reward in discountable_rewards:
            better_value = reward + discount_factor * better_value
            better_values.append(better_value)
        better_values = better_values[::-1]
        better_values = torch.FloatTensor(better_values)
        
        values_critic = torch.cat(values_critic[:-1])
        assert len(better_values) == len(values_critic)
        loss = (Variable(better_values) - values_critic) ** 2
        loss = sum(loss)
    
        self.optimizer.zero_grad()
        loss.backward(retain_graph=True)
        self.optimizer.step()

        
        
class AC(nn.Module):
    # not usual actor critic, advantage actor critic
    def __init__(self, state_space_dim, action_space_dim,
                 actor_lr=0.01, critic_lr=0.01):
        super().__init__()
        self.prev_rewards = None
        self.prev_logs_probs = None
        self.prev_states = None
        self.critic = Critic(state_space_dim, action_space_dim,
                             critic_lr=critic_lr)
        self.actor = Actor(state_space_dim, action_space_dim,
                           actor_lr=actor_lr)
        
    def get_action(self, current_state):
        """
        get_action function. From current probability distribution under the policy, samples a 
        possible action and computes logarithm over probability of sampled action.
        pytorch maintains a dynamic graph over computations involving to torch.Variable type variables. 
        Thus it will know how to backpropagate the gradient to the parameters involved in the making
        of the log values.
        :param current_state: torch.FloatTensor, the input state.
        :return: sampled_action, torch.autograd.variable.Variable, the action sampled.
        :return: log_prob_action, torch.autograd.variable.Variable, logarithm over sampled action's proba.
        """
        probability_distrib_actions = self.actor.forward(Variable(current_state))
        c = Categorical(probability_distrib_actions)
        sampled_action = c.sample()
        log_prob_action = c.log_prob(sampled_action)
        return sampled_action, log_prob_action
    
    def remember_logs_probs(self, log_prob):
        """
        Remember_logs_probs function. Adds an element to the list of logarithms over probabilities
        of past sampled actions.
        :param log_prob: a logarithm over probability of a sampled action.
        :return: Nothing, Update of the list of logarithms over probas is done inplace.
        """
        if self.prev_logs_probs is None:
            self.prev_logs_probs = log_prob
        else:
            self.prev_logs_probs = torch.cat([self.prev_logs_probs, log_prob])
            
    def forget_logs_probs(self):
        """
        forget_logs_probs function. deletes the list of logarithms over probabilities of past sampled actions.
        :param : Nothing. Deletion is done inplace.
        :return: Nothing. Deletion is done inplace.
        """
        self.prev_logs_probs = None
    
    def remember_rewards(self, reward):
        """
        remember_rewards function. Adds an element to the list of rewards from interactions with the environment.
        :param reward: float, a reward value.
        :return: Nothing, Update of the list of rewards is done inplace.
        """
        reward = torch.Tensor([float(reward)]) if not isinstance(reward, torch.Tensor) else reward
        if self.prev_rewards is None:
            self.prev_rewards = reward
        else:
            self.prev_rewards = torch.cat([self.prev_rewards, reward])
            
    def forget_rewards(self):
        """
        forget_rewards function. deletes the maintained list of rewards.
        :param : Nothing. Deletion is done inplace.
        :return: Nothing. Deletion is done inplace.
        """
        self.prev_rewards = None
    
    def remember_states(self, state):
        """
        remember_states function. Adds an element to the list of states gotten from interacting with the environment.
        :param state: torch.FloatTensor, the input state.
        :return: Nothing, Update of the list of states is done inplace
        """
        self.prev_states = [state] if self.prev_states is None \
            else self.prev_states + [state]
            
    def forget_states(self):
        """
        forget_states function. Deletes the maintained list of states.
        :param : Nothing. Deletion is done inplace.
        :return: Nothing. Deletion is done inplace.
        """
        self.prev_states = None
    
    def train(self, terminal_state):
        """
        Train the actor_critic agent. Trains first the actor agent and then the critic agent. Then forgets
        about rewars, states and logarithms over probabilities used for training.
        :param terminal_state: boolean, says if last cumulated state is terminal or not.
        :return: Nothing, Training is done inplace.
        """
        values_critic = [self.critic.forward(Variable(state)) for state in self.prev_states]
        self.actor.update_actor(self.prev_rewards, 
                            self.prev_logs_probs, 
                            values_critic,
                            terminal_state=terminal_state)
        self.critic.update_critic(self.prev_rewards, 
                             values_critic,
                             terminal_state=terminal_state)
        self.forget_logs_probs()
        self.forget_rewards()
        self.forget_states()

In [3]:
def run_rl(actor_critic, actions, rubiks_cube,
           nb_episodes=10_000, max_length_episode=100):
    """
    run_rl function. Function used to run the environment. Iterates episodes, a training session
    is triggered everytime an episode has ended. Cumulated reward over each episode is computed.
    A running reward of cumulated rewards over episodes is computed and trigger solved environment
    warning message when goes over a particular thresold.
    :param actor_critic: AC object, actor critic agent used to solve the environment.
    :param nb_episodes: int, max number of episodes to train on.
    :return: Nothing, running is done inplace.
    """
    nb_steps_before_remember = 10  # This variable is never used --> see commented line below unsure if keep it
    for episode in range(nb_episodes):
        cumul_reward = 0
        rubiks_cube = RubiksCube(shuffle=True)
        initial_state = rubiks_cube.cube.reshape(-1)
        done = False
        current_state = torch.from_numpy(initial_state).type(torch.FloatTensor)
        
        for step in range(1, max_length_episode):
            sampled_action, log_prob_action = actor_critic.get_action(current_state)
            reward, done = rubiks_cube.step(RubiksAction(actions[int(sampled_action)]))
            state = rubiks_cube.cube.reshape(-1)
            actor_critic.remember_logs_probs(log_prob_action)
            actor_critic.remember_rewards(reward)
            actor_critic.remember_states(current_state)
            current_state = torch.from_numpy(state).type(torch.FloatTensor)
            print("Episode : {} | Step : {} | Reward : {}".format(episode, step, reward))
            if done:
                if len(actor_critic.prev_rewards) <= 1:
                    pass
                else:
                    actor_critic.train(done)#send terminal state
                break   

In [5]:
if __name__ == '__main__':
    sides = ['r', 'l', 'u', 'd', 'f', 'b']
    directions = ['d', 'i']
    actions = []
    for side in sides:
        for dir in directions:
            actions.append(side+dir)
            
    rubiks_cube = RubiksCube(shuffle=True)
    print('Is resolved: {0}'.format(rubiks_cube.is_resolved()))
    
    input_state_dim = int(np.prod(np.array(rubiks_cube.cube.shape)))
    actor_critic = AC(input_state_dim, len(actions), 
                      actor_lr=0.01, critic_lr=0.01)
    run_rl(actor_critic, actions, rubiks_cube, 
           nb_episodes=100, max_length_episode=100)

Cube initialized!
Is resolved: False
Cube initialized!
Episode : 0 | Step : 1 | Reward : 13
Episode : 0 | Step : 2 | Reward : 14
Episode : 0 | Step : 3 | Reward : 14
Episode : 0 | Step : 4 | Reward : 14
Episode : 0 | Step : 5 | Reward : 14
Episode : 0 | Step : 6 | Reward : 13
Episode : 0 | Step : 7 | Reward : 18
Episode : 0 | Step : 8 | Reward : 15
Episode : 0 | Step : 9 | Reward : 14
Episode : 0 | Step : 10 | Reward : 17
Episode : 0 | Step : 11 | Reward : 17
Episode : 0 | Step : 12 | Reward : 17
Episode : 0 | Step : 13 | Reward : 14
Episode : 0 | Step : 14 | Reward : 15
Episode : 0 | Step : 15 | Reward : 16
Episode : 0 | Step : 16 | Reward : 17
Episode : 0 | Step : 17 | Reward : 16
Episode : 0 | Step : 18 | Reward : 15
Episode : 0 | Step : 19 | Reward : 13
Episode : 0 | Step : 20 | Reward : 11
Episode : 0 | Step : 21 | Reward : 11
Episode : 0 | Step : 22 | Reward : 11
Episode : 0 | Step : 23 | Reward : 13
Episode : 0 | Step : 24 | Reward : 11
Episode : 0 | Step : 25 | Reward : 13
Epis



Episode : 1 | Step : 1 | Reward : 15
Episode : 1 | Step : 2 | Reward : 17
Episode : 1 | Step : 3 | Reward : 15
Episode : 1 | Step : 4 | Reward : 13
Episode : 1 | Step : 5 | Reward : 10
Episode : 1 | Step : 6 | Reward : 11
Episode : 1 | Step : 7 | Reward : 12
Episode : 1 | Step : 8 | Reward : 13
Episode : 1 | Step : 9 | Reward : 13
Episode : 1 | Step : 10 | Reward : 14
Episode : 1 | Step : 11 | Reward : 12
Episode : 1 | Step : 12 | Reward : 11
Episode : 1 | Step : 13 | Reward : 13
Episode : 1 | Step : 14 | Reward : 11
Episode : 1 | Step : 15 | Reward : 13
Episode : 1 | Step : 16 | Reward : 12
Episode : 1 | Step : 17 | Reward : 12
Episode : 1 | Step : 18 | Reward : 12
Episode : 1 | Step : 19 | Reward : 13
Episode : 1 | Step : 20 | Reward : 17
Episode : 1 | Step : 21 | Reward : 17
Episode : 1 | Step : 22 | Reward : 16
Episode : 1 | Step : 23 | Reward : 14
Episode : 1 | Step : 24 | Reward : 19
Episode : 1 | Step : 25 | Reward : 18
Episode : 1 | Step : 26 | Reward : 15
Episode : 1 | Step : 

Episode : 4 | Step : 1 | Reward : 13
Episode : 4 | Step : 2 | Reward : 13
Episode : 4 | Step : 3 | Reward : 14
Episode : 4 | Step : 4 | Reward : 12
Episode : 4 | Step : 5 | Reward : 15
Episode : 4 | Step : 6 | Reward : 17
Episode : 4 | Step : 7 | Reward : 13
Episode : 4 | Step : 8 | Reward : 10
Episode : 4 | Step : 9 | Reward : 11
Episode : 4 | Step : 10 | Reward : 11
Episode : 4 | Step : 11 | Reward : 16
Episode : 4 | Step : 12 | Reward : 15
Episode : 4 | Step : 13 | Reward : 18
Episode : 4 | Step : 14 | Reward : 17
Episode : 4 | Step : 15 | Reward : 18
Episode : 4 | Step : 16 | Reward : 14
Episode : 4 | Step : 17 | Reward : 14
Episode : 4 | Step : 18 | Reward : 16
Episode : 4 | Step : 19 | Reward : 18
Episode : 4 | Step : 20 | Reward : 16
Episode : 4 | Step : 21 | Reward : 16
Episode : 4 | Step : 22 | Reward : 13
Episode : 4 | Step : 23 | Reward : 15
Episode : 4 | Step : 24 | Reward : 16
Episode : 4 | Step : 25 | Reward : 16
Episode : 4 | Step : 26 | Reward : 12
Episode : 4 | Step : 

Episode : 7 | Step : 1 | Reward : 15
Episode : 7 | Step : 2 | Reward : 13
Episode : 7 | Step : 3 | Reward : 12
Episode : 7 | Step : 4 | Reward : 10
Episode : 7 | Step : 5 | Reward : 11
Episode : 7 | Step : 6 | Reward : 9
Episode : 7 | Step : 7 | Reward : 9
Episode : 7 | Step : 8 | Reward : 9
Episode : 7 | Step : 9 | Reward : 11
Episode : 7 | Step : 10 | Reward : 9
Episode : 7 | Step : 11 | Reward : 9
Episode : 7 | Step : 12 | Reward : 9
Episode : 7 | Step : 13 | Reward : 13
Episode : 7 | Step : 14 | Reward : 16
Episode : 7 | Step : 15 | Reward : 16
Episode : 7 | Step : 16 | Reward : 13
Episode : 7 | Step : 17 | Reward : 13
Episode : 7 | Step : 18 | Reward : 14
Episode : 7 | Step : 19 | Reward : 13
Episode : 7 | Step : 20 | Reward : 14
Episode : 7 | Step : 21 | Reward : 14
Episode : 7 | Step : 22 | Reward : 12
Episode : 7 | Step : 23 | Reward : 14
Episode : 7 | Step : 24 | Reward : 12
Episode : 7 | Step : 25 | Reward : 12
Episode : 7 | Step : 26 | Reward : 12
Episode : 7 | Step : 27 | R

Episode : 10 | Step : 1 | Reward : 18
Episode : 10 | Step : 2 | Reward : 19
Episode : 10 | Step : 3 | Reward : 18
Episode : 10 | Step : 4 | Reward : 20
Episode : 10 | Step : 5 | Reward : 17
Episode : 10 | Step : 6 | Reward : 14
Episode : 10 | Step : 7 | Reward : 12
Episode : 10 | Step : 8 | Reward : 13
Episode : 10 | Step : 9 | Reward : 14
Episode : 10 | Step : 10 | Reward : 15
Episode : 10 | Step : 11 | Reward : 15
Episode : 10 | Step : 12 | Reward : 16
Episode : 10 | Step : 13 | Reward : 15
Episode : 10 | Step : 14 | Reward : 15
Episode : 10 | Step : 15 | Reward : 15
Episode : 10 | Step : 16 | Reward : 18
Episode : 10 | Step : 17 | Reward : 15
Episode : 10 | Step : 18 | Reward : 15
Episode : 10 | Step : 19 | Reward : 14
Episode : 10 | Step : 20 | Reward : 17
Episode : 10 | Step : 21 | Reward : 17
Episode : 10 | Step : 22 | Reward : 15
Episode : 10 | Step : 23 | Reward : 17
Episode : 10 | Step : 24 | Reward : 15
Episode : 10 | Step : 25 | Reward : 11
Episode : 10 | Step : 26 | Reward 

Episode : 13 | Step : 1 | Reward : 15
Episode : 13 | Step : 2 | Reward : 14
Episode : 13 | Step : 3 | Reward : 13
Episode : 13 | Step : 4 | Reward : 15
Episode : 13 | Step : 5 | Reward : 15
Episode : 13 | Step : 6 | Reward : 17
Episode : 13 | Step : 7 | Reward : 15
Episode : 13 | Step : 8 | Reward : 14
Episode : 13 | Step : 9 | Reward : 16
Episode : 13 | Step : 10 | Reward : 12
Episode : 13 | Step : 11 | Reward : 12
Episode : 13 | Step : 12 | Reward : 12
Episode : 13 | Step : 13 | Reward : 12
Episode : 13 | Step : 14 | Reward : 15
Episode : 13 | Step : 15 | Reward : 14
Episode : 13 | Step : 16 | Reward : 13
Episode : 13 | Step : 17 | Reward : 15
Episode : 13 | Step : 18 | Reward : 11
Episode : 13 | Step : 19 | Reward : 13
Episode : 13 | Step : 20 | Reward : 11
Episode : 13 | Step : 21 | Reward : 11
Episode : 13 | Step : 22 | Reward : 10
Episode : 13 | Step : 23 | Reward : 9
Episode : 13 | Step : 24 | Reward : 14
Episode : 13 | Step : 25 | Reward : 15
Episode : 13 | Step : 26 | Reward :

Episode : 15 | Step : 50 | Reward : 13
Episode : 15 | Step : 51 | Reward : 12
Episode : 15 | Step : 52 | Reward : 10
Episode : 15 | Step : 53 | Reward : 13
Episode : 15 | Step : 54 | Reward : 14
Episode : 15 | Step : 55 | Reward : 16
Episode : 15 | Step : 56 | Reward : 14
Episode : 15 | Step : 57 | Reward : 16
Episode : 15 | Step : 58 | Reward : 15
Episode : 15 | Step : 59 | Reward : 13
Episode : 15 | Step : 60 | Reward : 15
Episode : 15 | Step : 61 | Reward : 19
Episode : 15 | Step : 62 | Reward : 15
Episode : 15 | Step : 63 | Reward : 14
Episode : 15 | Step : 64 | Reward : 14
Episode : 15 | Step : 65 | Reward : 14
Episode : 15 | Step : 66 | Reward : 14
Episode : 15 | Step : 67 | Reward : 17
Episode : 15 | Step : 68 | Reward : 16
Episode : 15 | Step : 69 | Reward : 20
Episode : 15 | Step : 70 | Reward : 18
Episode : 15 | Step : 71 | Reward : 14
Episode : 15 | Step : 72 | Reward : 13
Episode : 15 | Step : 73 | Reward : 14
Episode : 15 | Step : 74 | Reward : 14
Episode : 15 | Step : 75 

Episode : 18 | Step : 39 | Reward : 15
Episode : 18 | Step : 40 | Reward : 18
Episode : 18 | Step : 41 | Reward : 16
Episode : 18 | Step : 42 | Reward : 16
Episode : 18 | Step : 43 | Reward : 15
Episode : 18 | Step : 44 | Reward : 13
Episode : 18 | Step : 45 | Reward : 13
Episode : 18 | Step : 46 | Reward : 13
Episode : 18 | Step : 47 | Reward : 12
Episode : 18 | Step : 48 | Reward : 15
Episode : 18 | Step : 49 | Reward : 16
Episode : 18 | Step : 50 | Reward : 10
Episode : 18 | Step : 51 | Reward : 11
Episode : 18 | Step : 52 | Reward : 14
Episode : 18 | Step : 53 | Reward : 17
Episode : 18 | Step : 54 | Reward : 15
Episode : 18 | Step : 55 | Reward : 12
Episode : 18 | Step : 56 | Reward : 11
Episode : 18 | Step : 57 | Reward : 11
Episode : 18 | Step : 58 | Reward : 14
Episode : 18 | Step : 59 | Reward : 16
Episode : 18 | Step : 60 | Reward : 12
Episode : 18 | Step : 61 | Reward : 8
Episode : 18 | Step : 62 | Reward : 12
Episode : 18 | Step : 63 | Reward : 14
Episode : 18 | Step : 64 |

Episode : 21 | Step : 6 | Reward : 12
Episode : 21 | Step : 7 | Reward : 13
Episode : 21 | Step : 8 | Reward : 14
Episode : 21 | Step : 9 | Reward : 13
Episode : 21 | Step : 10 | Reward : 14
Episode : 21 | Step : 11 | Reward : 16
Episode : 21 | Step : 12 | Reward : 14
Episode : 21 | Step : 13 | Reward : 16
Episode : 21 | Step : 14 | Reward : 18
Episode : 21 | Step : 15 | Reward : 18
Episode : 21 | Step : 16 | Reward : 16
Episode : 21 | Step : 17 | Reward : 18
Episode : 21 | Step : 18 | Reward : 15
Episode : 21 | Step : 19 | Reward : 15
Episode : 21 | Step : 20 | Reward : 17
Episode : 21 | Step : 21 | Reward : 16
Episode : 21 | Step : 22 | Reward : 17
Episode : 21 | Step : 23 | Reward : 16
Episode : 21 | Step : 24 | Reward : 16
Episode : 21 | Step : 25 | Reward : 13
Episode : 21 | Step : 26 | Reward : 11
Episode : 21 | Step : 27 | Reward : 11
Episode : 21 | Step : 28 | Reward : 10
Episode : 21 | Step : 29 | Reward : 13
Episode : 21 | Step : 30 | Reward : 11
Episode : 21 | Step : 31 | Re

Episode : 23 | Step : 95 | Reward : 8
Episode : 23 | Step : 96 | Reward : 11
Episode : 23 | Step : 97 | Reward : 10
Episode : 23 | Step : 98 | Reward : 11
Episode : 23 | Step : 99 | Reward : 10
Cube initialized!
Episode : 24 | Step : 1 | Reward : 12
Episode : 24 | Step : 2 | Reward : 11
Episode : 24 | Step : 3 | Reward : 11
Episode : 24 | Step : 4 | Reward : 10
Episode : 24 | Step : 5 | Reward : 11
Episode : 24 | Step : 6 | Reward : 11
Episode : 24 | Step : 7 | Reward : 11
Episode : 24 | Step : 8 | Reward : 11
Episode : 24 | Step : 9 | Reward : 10
Episode : 24 | Step : 10 | Reward : 9
Episode : 24 | Step : 11 | Reward : 10
Episode : 24 | Step : 12 | Reward : 12
Episode : 24 | Step : 13 | Reward : 12
Episode : 24 | Step : 14 | Reward : 11
Episode : 24 | Step : 15 | Reward : 10
Episode : 24 | Step : 16 | Reward : 13
Episode : 24 | Step : 17 | Reward : 14
Episode : 24 | Step : 18 | Reward : 13
Episode : 24 | Step : 19 | Reward : 14
Episode : 24 | Step : 20 | Reward : 13
Episode : 24 | Ste

Episode : 26 | Step : 36 | Reward : 10
Episode : 26 | Step : 37 | Reward : 12
Episode : 26 | Step : 38 | Reward : 11
Episode : 26 | Step : 39 | Reward : 10
Episode : 26 | Step : 40 | Reward : 12
Episode : 26 | Step : 41 | Reward : 12
Episode : 26 | Step : 42 | Reward : 14
Episode : 26 | Step : 43 | Reward : 12
Episode : 26 | Step : 44 | Reward : 11
Episode : 26 | Step : 45 | Reward : 12
Episode : 26 | Step : 46 | Reward : 13
Episode : 26 | Step : 47 | Reward : 12
Episode : 26 | Step : 48 | Reward : 13
Episode : 26 | Step : 49 | Reward : 12
Episode : 26 | Step : 50 | Reward : 13
Episode : 26 | Step : 51 | Reward : 17
Episode : 26 | Step : 52 | Reward : 14
Episode : 26 | Step : 53 | Reward : 17
Episode : 26 | Step : 54 | Reward : 13
Episode : 26 | Step : 55 | Reward : 12
Episode : 26 | Step : 56 | Reward : 14
Episode : 26 | Step : 57 | Reward : 14
Episode : 26 | Step : 58 | Reward : 13
Episode : 26 | Step : 59 | Reward : 16
Episode : 26 | Step : 60 | Reward : 13
Episode : 26 | Step : 61 

Episode : 29 | Step : 1 | Reward : 13
Episode : 29 | Step : 2 | Reward : 12
Episode : 29 | Step : 3 | Reward : 13
Episode : 29 | Step : 4 | Reward : 16
Episode : 29 | Step : 5 | Reward : 15
Episode : 29 | Step : 6 | Reward : 16
Episode : 29 | Step : 7 | Reward : 14
Episode : 29 | Step : 8 | Reward : 16
Episode : 29 | Step : 9 | Reward : 15
Episode : 29 | Step : 10 | Reward : 16
Episode : 29 | Step : 11 | Reward : 11
Episode : 29 | Step : 12 | Reward : 14
Episode : 29 | Step : 13 | Reward : 11
Episode : 29 | Step : 14 | Reward : 14
Episode : 29 | Step : 15 | Reward : 11
Episode : 29 | Step : 16 | Reward : 14
Episode : 29 | Step : 17 | Reward : 12
Episode : 29 | Step : 18 | Reward : 13
Episode : 29 | Step : 19 | Reward : 12
Episode : 29 | Step : 20 | Reward : 13
Episode : 29 | Step : 21 | Reward : 11
Episode : 29 | Step : 22 | Reward : 11
Episode : 29 | Step : 23 | Reward : 9
Episode : 29 | Step : 24 | Reward : 11
Episode : 29 | Step : 25 | Reward : 9
Episode : 29 | Step : 26 | Reward : 

Episode : 31 | Step : 89 | Reward : 9
Episode : 31 | Step : 90 | Reward : 9
Episode : 31 | Step : 91 | Reward : 9
Episode : 31 | Step : 92 | Reward : 11
Episode : 31 | Step : 93 | Reward : 10
Episode : 31 | Step : 94 | Reward : 10
Episode : 31 | Step : 95 | Reward : 10
Episode : 31 | Step : 96 | Reward : 8
Episode : 31 | Step : 97 | Reward : 7
Episode : 31 | Step : 98 | Reward : 9
Episode : 31 | Step : 99 | Reward : 13
Cube initialized!
Episode : 32 | Step : 1 | Reward : 19
Episode : 32 | Step : 2 | Reward : 19
Episode : 32 | Step : 3 | Reward : 18
Episode : 32 | Step : 4 | Reward : 15
Episode : 32 | Step : 5 | Reward : 13
Episode : 32 | Step : 6 | Reward : 11
Episode : 32 | Step : 7 | Reward : 11
Episode : 32 | Step : 8 | Reward : 15
Episode : 32 | Step : 9 | Reward : 19
Episode : 32 | Step : 10 | Reward : 17
Episode : 32 | Step : 11 | Reward : 20
Episode : 32 | Step : 12 | Reward : 17
Episode : 32 | Step : 13 | Reward : 17
Episode : 32 | Step : 14 | Reward : 13
Episode : 32 | Step : 

Episode : 34 | Step : 35 | Reward : 14
Episode : 34 | Step : 36 | Reward : 13
Episode : 34 | Step : 37 | Reward : 14
Episode : 34 | Step : 38 | Reward : 11
Episode : 34 | Step : 39 | Reward : 11
Episode : 34 | Step : 40 | Reward : 10
Episode : 34 | Step : 41 | Reward : 12
Episode : 34 | Step : 42 | Reward : 14
Episode : 34 | Step : 43 | Reward : 12
Episode : 34 | Step : 44 | Reward : 14
Episode : 34 | Step : 45 | Reward : 11
Episode : 34 | Step : 46 | Reward : 11
Episode : 34 | Step : 47 | Reward : 12
Episode : 34 | Step : 48 | Reward : 12
Episode : 34 | Step : 49 | Reward : 15
Episode : 34 | Step : 50 | Reward : 14
Episode : 34 | Step : 51 | Reward : 16
Episode : 34 | Step : 52 | Reward : 18
Episode : 34 | Step : 53 | Reward : 16
Episode : 34 | Step : 54 | Reward : 14
Episode : 34 | Step : 55 | Reward : 14
Episode : 34 | Step : 56 | Reward : 15
Episode : 34 | Step : 57 | Reward : 17
Episode : 34 | Step : 58 | Reward : 14
Episode : 34 | Step : 59 | Reward : 11
Episode : 34 | Step : 60 

Episode : 37 | Step : 19 | Reward : 12
Episode : 37 | Step : 20 | Reward : 11
Episode : 37 | Step : 21 | Reward : 12
Episode : 37 | Step : 22 | Reward : 13
Episode : 37 | Step : 23 | Reward : 12
Episode : 37 | Step : 24 | Reward : 13
Episode : 37 | Step : 25 | Reward : 11
Episode : 37 | Step : 26 | Reward : 15
Episode : 37 | Step : 27 | Reward : 11
Episode : 37 | Step : 28 | Reward : 11
Episode : 37 | Step : 29 | Reward : 13
Episode : 37 | Step : 30 | Reward : 12
Episode : 37 | Step : 31 | Reward : 14
Episode : 37 | Step : 32 | Reward : 16
Episode : 37 | Step : 33 | Reward : 13
Episode : 37 | Step : 34 | Reward : 13
Episode : 37 | Step : 35 | Reward : 13
Episode : 37 | Step : 36 | Reward : 14
Episode : 37 | Step : 37 | Reward : 16
Episode : 37 | Step : 38 | Reward : 15
Episode : 37 | Step : 39 | Reward : 12
Episode : 37 | Step : 40 | Reward : 11
Episode : 37 | Step : 41 | Reward : 10
Episode : 37 | Step : 42 | Reward : 14
Episode : 37 | Step : 43 | Reward : 15
Episode : 37 | Step : 44 

Episode : 39 | Step : 67 | Reward : 18
Episode : 39 | Step : 68 | Reward : 16
Episode : 39 | Step : 69 | Reward : 15
Episode : 39 | Step : 70 | Reward : 13
Episode : 39 | Step : 71 | Reward : 12
Episode : 39 | Step : 72 | Reward : 13
Episode : 39 | Step : 73 | Reward : 13
Episode : 39 | Step : 74 | Reward : 12
Episode : 39 | Step : 75 | Reward : 15
Episode : 39 | Step : 76 | Reward : 14
Episode : 39 | Step : 77 | Reward : 11
Episode : 39 | Step : 78 | Reward : 13
Episode : 39 | Step : 79 | Reward : 16
Episode : 39 | Step : 80 | Reward : 13
Episode : 39 | Step : 81 | Reward : 12
Episode : 39 | Step : 82 | Reward : 15
Episode : 39 | Step : 83 | Reward : 20
Episode : 39 | Step : 84 | Reward : 15
Episode : 39 | Step : 85 | Reward : 19
Episode : 39 | Step : 86 | Reward : 20
Episode : 39 | Step : 87 | Reward : 20
Episode : 39 | Step : 88 | Reward : 19
Episode : 39 | Step : 89 | Reward : 18
Episode : 39 | Step : 90 | Reward : 16
Episode : 39 | Step : 91 | Reward : 17
Episode : 39 | Step : 92 

Episode : 42 | Step : 72 | Reward : 16
Episode : 42 | Step : 73 | Reward : 15
Episode : 42 | Step : 74 | Reward : 16
Episode : 42 | Step : 75 | Reward : 14
Episode : 42 | Step : 76 | Reward : 13
Episode : 42 | Step : 77 | Reward : 14
Episode : 42 | Step : 78 | Reward : 18
Episode : 42 | Step : 79 | Reward : 14
Episode : 42 | Step : 80 | Reward : 13
Episode : 42 | Step : 81 | Reward : 14
Episode : 42 | Step : 82 | Reward : 13
Episode : 42 | Step : 83 | Reward : 10
Episode : 42 | Step : 84 | Reward : 9
Episode : 42 | Step : 85 | Reward : 13
Episode : 42 | Step : 86 | Reward : 13
Episode : 42 | Step : 87 | Reward : 10
Episode : 42 | Step : 88 | Reward : 10
Episode : 42 | Step : 89 | Reward : 11
Episode : 42 | Step : 90 | Reward : 12
Episode : 42 | Step : 91 | Reward : 17
Episode : 42 | Step : 92 | Reward : 12
Episode : 42 | Step : 93 | Reward : 13
Episode : 42 | Step : 94 | Reward : 11
Episode : 42 | Step : 95 | Reward : 13
Episode : 42 | Step : 96 | Reward : 15
Episode : 42 | Step : 97 |

Episode : 45 | Step : 44 | Reward : 15
Episode : 45 | Step : 45 | Reward : 17
Episode : 45 | Step : 46 | Reward : 15
Episode : 45 | Step : 47 | Reward : 13
Episode : 45 | Step : 48 | Reward : 17
Episode : 45 | Step : 49 | Reward : 12
Episode : 45 | Step : 50 | Reward : 14
Episode : 45 | Step : 51 | Reward : 13
Episode : 45 | Step : 52 | Reward : 12
Episode : 45 | Step : 53 | Reward : 14
Episode : 45 | Step : 54 | Reward : 14
Episode : 45 | Step : 55 | Reward : 15
Episode : 45 | Step : 56 | Reward : 14
Episode : 45 | Step : 57 | Reward : 14
Episode : 45 | Step : 58 | Reward : 14
Episode : 45 | Step : 59 | Reward : 14
Episode : 45 | Step : 60 | Reward : 16
Episode : 45 | Step : 61 | Reward : 14
Episode : 45 | Step : 62 | Reward : 14
Episode : 45 | Step : 63 | Reward : 15
Episode : 45 | Step : 64 | Reward : 13
Episode : 45 | Step : 65 | Reward : 15
Episode : 45 | Step : 66 | Reward : 17
Episode : 45 | Step : 67 | Reward : 16
Episode : 45 | Step : 68 | Reward : 19
Episode : 45 | Step : 69 

Episode : 48 | Step : 1 | Reward : 15
Episode : 48 | Step : 2 | Reward : 15
Episode : 48 | Step : 3 | Reward : 15
Episode : 48 | Step : 4 | Reward : 15
Episode : 48 | Step : 5 | Reward : 14
Episode : 48 | Step : 6 | Reward : 12
Episode : 48 | Step : 7 | Reward : 13
Episode : 48 | Step : 8 | Reward : 12
Episode : 48 | Step : 9 | Reward : 15
Episode : 48 | Step : 10 | Reward : 12
Episode : 48 | Step : 11 | Reward : 12
Episode : 48 | Step : 12 | Reward : 12
Episode : 48 | Step : 13 | Reward : 12
Episode : 48 | Step : 14 | Reward : 13
Episode : 48 | Step : 15 | Reward : 12
Episode : 48 | Step : 16 | Reward : 13
Episode : 48 | Step : 17 | Reward : 12
Episode : 48 | Step : 18 | Reward : 13
Episode : 48 | Step : 19 | Reward : 12
Episode : 48 | Step : 20 | Reward : 11
Episode : 48 | Step : 21 | Reward : 10
Episode : 48 | Step : 22 | Reward : 12
Episode : 48 | Step : 23 | Reward : 10
Episode : 48 | Step : 24 | Reward : 9
Episode : 48 | Step : 25 | Reward : 11
Episode : 48 | Step : 26 | Reward :

Episode : 50 | Step : 97 | Reward : 17
Episode : 50 | Step : 98 | Reward : 15
Episode : 50 | Step : 99 | Reward : 12
Cube initialized!
Episode : 51 | Step : 1 | Reward : 14
Episode : 51 | Step : 2 | Reward : 15
Episode : 51 | Step : 3 | Reward : 14
Episode : 51 | Step : 4 | Reward : 11
Episode : 51 | Step : 5 | Reward : 11
Episode : 51 | Step : 6 | Reward : 10
Episode : 51 | Step : 7 | Reward : 11
Episode : 51 | Step : 8 | Reward : 10
Episode : 51 | Step : 9 | Reward : 11
Episode : 51 | Step : 10 | Reward : 10
Episode : 51 | Step : 11 | Reward : 9
Episode : 51 | Step : 12 | Reward : 7
Episode : 51 | Step : 13 | Reward : 11
Episode : 51 | Step : 14 | Reward : 13
Episode : 51 | Step : 15 | Reward : 11
Episode : 51 | Step : 16 | Reward : 13
Episode : 51 | Step : 17 | Reward : 13
Episode : 51 | Step : 18 | Reward : 12
Episode : 51 | Step : 19 | Reward : 13
Episode : 51 | Step : 20 | Reward : 13
Episode : 51 | Step : 21 | Reward : 11
Episode : 51 | Step : 22 | Reward : 11
Episode : 51 | Ste

Episode : 53 | Step : 49 | Reward : 11
Episode : 53 | Step : 50 | Reward : 9
Episode : 53 | Step : 51 | Reward : 11
Episode : 53 | Step : 52 | Reward : 11
Episode : 53 | Step : 53 | Reward : 10
Episode : 53 | Step : 54 | Reward : 12
Episode : 53 | Step : 55 | Reward : 13
Episode : 53 | Step : 56 | Reward : 12
Episode : 53 | Step : 57 | Reward : 13
Episode : 53 | Step : 58 | Reward : 13
Episode : 53 | Step : 59 | Reward : 14
Episode : 53 | Step : 60 | Reward : 13
Episode : 53 | Step : 61 | Reward : 15
Episode : 53 | Step : 62 | Reward : 13
Episode : 53 | Step : 63 | Reward : 12
Episode : 53 | Step : 64 | Reward : 9
Episode : 53 | Step : 65 | Reward : 14
Episode : 53 | Step : 66 | Reward : 9
Episode : 53 | Step : 67 | Reward : 12
Episode : 53 | Step : 68 | Reward : 9
Episode : 53 | Step : 69 | Reward : 8
Episode : 53 | Step : 70 | Reward : 11
Episode : 53 | Step : 71 | Reward : 11
Episode : 53 | Step : 72 | Reward : 13
Episode : 53 | Step : 73 | Reward : 15
Episode : 53 | Step : 74 | Rew

Episode : 56 | Step : 17 | Reward : 9
Episode : 56 | Step : 18 | Reward : 10
Episode : 56 | Step : 19 | Reward : 10
Episode : 56 | Step : 20 | Reward : 12
Episode : 56 | Step : 21 | Reward : 12
Episode : 56 | Step : 22 | Reward : 14
Episode : 56 | Step : 23 | Reward : 11
Episode : 56 | Step : 24 | Reward : 12
Episode : 56 | Step : 25 | Reward : 10
Episode : 56 | Step : 26 | Reward : 12
Episode : 56 | Step : 27 | Reward : 14
Episode : 56 | Step : 28 | Reward : 16
Episode : 56 | Step : 29 | Reward : 18
Episode : 56 | Step : 30 | Reward : 15
Episode : 56 | Step : 31 | Reward : 18
Episode : 56 | Step : 32 | Reward : 16
Episode : 56 | Step : 33 | Reward : 15
Episode : 56 | Step : 34 | Reward : 13
Episode : 56 | Step : 35 | Reward : 15
Episode : 56 | Step : 36 | Reward : 16
Episode : 56 | Step : 37 | Reward : 17
Episode : 56 | Step : 38 | Reward : 16
Episode : 56 | Step : 39 | Reward : 15
Episode : 56 | Step : 40 | Reward : 15
Episode : 56 | Step : 41 | Reward : 15
Episode : 56 | Step : 42 |

Episode : 59 | Step : 1 | Reward : 14
Episode : 59 | Step : 2 | Reward : 14
Episode : 59 | Step : 3 | Reward : 14
Episode : 59 | Step : 4 | Reward : 15
Episode : 59 | Step : 5 | Reward : 19
Episode : 59 | Step : 6 | Reward : 18
Episode : 59 | Step : 7 | Reward : 18
Episode : 59 | Step : 8 | Reward : 15
Episode : 59 | Step : 9 | Reward : 15
Episode : 59 | Step : 10 | Reward : 17
Episode : 59 | Step : 11 | Reward : 16
Episode : 59 | Step : 12 | Reward : 19
Episode : 59 | Step : 13 | Reward : 16
Episode : 59 | Step : 14 | Reward : 19
Episode : 59 | Step : 15 | Reward : 18
Episode : 59 | Step : 16 | Reward : 15
Episode : 59 | Step : 17 | Reward : 11
Episode : 59 | Step : 18 | Reward : 10
Episode : 59 | Step : 19 | Reward : 11
Episode : 59 | Step : 20 | Reward : 9
Episode : 59 | Step : 21 | Reward : 11
Episode : 59 | Step : 22 | Reward : 15
Episode : 59 | Step : 23 | Reward : 17
Episode : 59 | Step : 24 | Reward : 12
Episode : 59 | Step : 25 | Reward : 17
Episode : 59 | Step : 26 | Reward :

Episode : 61 | Step : 89 | Reward : 12
Episode : 61 | Step : 90 | Reward : 11
Episode : 61 | Step : 91 | Reward : 12
Episode : 61 | Step : 92 | Reward : 13
Episode : 61 | Step : 93 | Reward : 17
Episode : 61 | Step : 94 | Reward : 16
Episode : 61 | Step : 95 | Reward : 12
Episode : 61 | Step : 96 | Reward : 11
Episode : 61 | Step : 97 | Reward : 16
Episode : 61 | Step : 98 | Reward : 14
Episode : 61 | Step : 99 | Reward : 15
Cube initialized!
Episode : 62 | Step : 1 | Reward : 11
Episode : 62 | Step : 2 | Reward : 9
Episode : 62 | Step : 3 | Reward : 10
Episode : 62 | Step : 4 | Reward : 11
Episode : 62 | Step : 5 | Reward : 12
Episode : 62 | Step : 6 | Reward : 14
Episode : 62 | Step : 7 | Reward : 12
Episode : 62 | Step : 8 | Reward : 14
Episode : 62 | Step : 9 | Reward : 12
Episode : 62 | Step : 10 | Reward : 11
Episode : 62 | Step : 11 | Reward : 8
Episode : 62 | Step : 12 | Reward : 11
Episode : 62 | Step : 13 | Reward : 9
Episode : 62 | Step : 14 | Reward : 13
Episode : 62 | Step

Episode : 64 | Step : 46 | Reward : 10
Episode : 64 | Step : 47 | Reward : 11
Episode : 64 | Step : 48 | Reward : 12
Episode : 64 | Step : 49 | Reward : 11
Episode : 64 | Step : 50 | Reward : 12
Episode : 64 | Step : 51 | Reward : 12
Episode : 64 | Step : 52 | Reward : 13
Episode : 64 | Step : 53 | Reward : 17
Episode : 64 | Step : 54 | Reward : 15
Episode : 64 | Step : 55 | Reward : 14
Episode : 64 | Step : 56 | Reward : 13
Episode : 64 | Step : 57 | Reward : 15
Episode : 64 | Step : 58 | Reward : 13
Episode : 64 | Step : 59 | Reward : 11
Episode : 64 | Step : 60 | Reward : 10
Episode : 64 | Step : 61 | Reward : 12
Episode : 64 | Step : 62 | Reward : 15
Episode : 64 | Step : 63 | Reward : 19
Episode : 64 | Step : 64 | Reward : 14
Episode : 64 | Step : 65 | Reward : 14
Episode : 64 | Step : 66 | Reward : 13
Episode : 64 | Step : 67 | Reward : 14
Episode : 64 | Step : 68 | Reward : 13
Episode : 64 | Step : 69 | Reward : 16
Episode : 64 | Step : 70 | Reward : 16
Episode : 64 | Step : 71 

Episode : 67 | Step : 12 | Reward : 16
Episode : 67 | Step : 13 | Reward : 14
Episode : 67 | Step : 14 | Reward : 16
Episode : 67 | Step : 15 | Reward : 13
Episode : 67 | Step : 16 | Reward : 12
Episode : 67 | Step : 17 | Reward : 12
Episode : 67 | Step : 18 | Reward : 15
Episode : 67 | Step : 19 | Reward : 15
Episode : 67 | Step : 20 | Reward : 12
Episode : 67 | Step : 21 | Reward : 12
Episode : 67 | Step : 22 | Reward : 12
Episode : 67 | Step : 23 | Reward : 12
Episode : 67 | Step : 24 | Reward : 12
Episode : 67 | Step : 25 | Reward : 12
Episode : 67 | Step : 26 | Reward : 12
Episode : 67 | Step : 27 | Reward : 15
Episode : 67 | Step : 28 | Reward : 15
Episode : 67 | Step : 29 | Reward : 16
Episode : 67 | Step : 30 | Reward : 19
Episode : 67 | Step : 31 | Reward : 16
Episode : 67 | Step : 32 | Reward : 15
Episode : 67 | Step : 33 | Reward : 17
Episode : 67 | Step : 34 | Reward : 18
Episode : 67 | Step : 35 | Reward : 13
Episode : 67 | Step : 36 | Reward : 13
Episode : 67 | Step : 37 

Episode : 69 | Step : 58 | Reward : 13
Episode : 69 | Step : 59 | Reward : 13
Episode : 69 | Step : 60 | Reward : 12
Episode : 69 | Step : 61 | Reward : 13
Episode : 69 | Step : 62 | Reward : 15
Episode : 69 | Step : 63 | Reward : 14
Episode : 69 | Step : 64 | Reward : 13
Episode : 69 | Step : 65 | Reward : 14
Episode : 69 | Step : 66 | Reward : 16
Episode : 69 | Step : 67 | Reward : 17
Episode : 69 | Step : 68 | Reward : 15
Episode : 69 | Step : 69 | Reward : 13
Episode : 69 | Step : 70 | Reward : 11
Episode : 69 | Step : 71 | Reward : 10
Episode : 69 | Step : 72 | Reward : 12
Episode : 69 | Step : 73 | Reward : 8
Episode : 69 | Step : 74 | Reward : 10
Episode : 69 | Step : 75 | Reward : 12
Episode : 69 | Step : 76 | Reward : 9
Episode : 69 | Step : 77 | Reward : 10
Episode : 69 | Step : 78 | Reward : 10
Episode : 69 | Step : 79 | Reward : 11
Episode : 69 | Step : 80 | Reward : 10
Episode : 69 | Step : 81 | Reward : 10
Episode : 69 | Step : 82 | Reward : 11
Episode : 69 | Step : 83 | 

Episode : 72 | Step : 28 | Reward : 13
Episode : 72 | Step : 29 | Reward : 13
Episode : 72 | Step : 30 | Reward : 15
Episode : 72 | Step : 31 | Reward : 12
Episode : 72 | Step : 32 | Reward : 12
Episode : 72 | Step : 33 | Reward : 18
Episode : 72 | Step : 34 | Reward : 14
Episode : 72 | Step : 35 | Reward : 12
Episode : 72 | Step : 36 | Reward : 12
Episode : 72 | Step : 37 | Reward : 15
Episode : 72 | Step : 38 | Reward : 12
Episode : 72 | Step : 39 | Reward : 13
Episode : 72 | Step : 40 | Reward : 11
Episode : 72 | Step : 41 | Reward : 13
Episode : 72 | Step : 42 | Reward : 15
Episode : 72 | Step : 43 | Reward : 13
Episode : 72 | Step : 44 | Reward : 15
Episode : 72 | Step : 45 | Reward : 14
Episode : 72 | Step : 46 | Reward : 10
Episode : 72 | Step : 47 | Reward : 11
Episode : 72 | Step : 48 | Reward : 11
Episode : 72 | Step : 49 | Reward : 11
Episode : 72 | Step : 50 | Reward : 8
Episode : 72 | Step : 51 | Reward : 8
Episode : 72 | Step : 52 | Reward : 11
Episode : 72 | Step : 53 | 

Episode : 75 | Step : 12 | Reward : 27
Episode : 75 | Step : 13 | Reward : 23
Episode : 75 | Step : 14 | Reward : 22
Episode : 75 | Step : 15 | Reward : 23
Episode : 75 | Step : 16 | Reward : 17
Episode : 75 | Step : 17 | Reward : 16
Episode : 75 | Step : 18 | Reward : 18
Episode : 75 | Step : 19 | Reward : 19
Episode : 75 | Step : 20 | Reward : 16
Episode : 75 | Step : 21 | Reward : 15
Episode : 75 | Step : 22 | Reward : 17
Episode : 75 | Step : 23 | Reward : 15
Episode : 75 | Step : 24 | Reward : 18
Episode : 75 | Step : 25 | Reward : 16
Episode : 75 | Step : 26 | Reward : 14
Episode : 75 | Step : 27 | Reward : 15
Episode : 75 | Step : 28 | Reward : 16
Episode : 75 | Step : 29 | Reward : 15
Episode : 75 | Step : 30 | Reward : 15
Episode : 75 | Step : 31 | Reward : 14
Episode : 75 | Step : 32 | Reward : 13
Episode : 75 | Step : 33 | Reward : 14
Episode : 75 | Step : 34 | Reward : 13
Episode : 75 | Step : 35 | Reward : 14
Episode : 75 | Step : 36 | Reward : 15
Episode : 75 | Step : 37 

Episode : 77 | Step : 41 | Reward : 9
Episode : 77 | Step : 42 | Reward : 9
Episode : 77 | Step : 43 | Reward : 10
Episode : 77 | Step : 44 | Reward : 8
Episode : 77 | Step : 45 | Reward : 12
Episode : 77 | Step : 46 | Reward : 13
Episode : 77 | Step : 47 | Reward : 12
Episode : 77 | Step : 48 | Reward : 9
Episode : 77 | Step : 49 | Reward : 13
Episode : 77 | Step : 50 | Reward : 16
Episode : 77 | Step : 51 | Reward : 14
Episode : 77 | Step : 52 | Reward : 13
Episode : 77 | Step : 53 | Reward : 16
Episode : 77 | Step : 54 | Reward : 19
Episode : 77 | Step : 55 | Reward : 18
Episode : 77 | Step : 56 | Reward : 19
Episode : 77 | Step : 57 | Reward : 14
Episode : 77 | Step : 58 | Reward : 13
Episode : 77 | Step : 59 | Reward : 12
Episode : 77 | Step : 60 | Reward : 13
Episode : 77 | Step : 61 | Reward : 13
Episode : 77 | Step : 62 | Reward : 13
Episode : 77 | Step : 63 | Reward : 14
Episode : 77 | Step : 64 | Reward : 13
Episode : 77 | Step : 65 | Reward : 12
Episode : 77 | Step : 66 | Re

Episode : 81 | Step : 1 | Reward : 14
Episode : 81 | Step : 2 | Reward : 14
Episode : 81 | Step : 3 | Reward : 13
Episode : 81 | Step : 4 | Reward : 15
Episode : 81 | Step : 5 | Reward : 15
Episode : 81 | Step : 6 | Reward : 21
Episode : 81 | Step : 7 | Reward : 19
Episode : 81 | Step : 8 | Reward : 19
Episode : 81 | Step : 9 | Reward : 19
Episode : 81 | Step : 10 | Reward : 19
Episode : 81 | Step : 11 | Reward : 17
Episode : 81 | Step : 12 | Reward : 15
Episode : 81 | Step : 13 | Reward : 11
Episode : 81 | Step : 14 | Reward : 12
Episode : 81 | Step : 15 | Reward : 15
Episode : 81 | Step : 16 | Reward : 16
Episode : 81 | Step : 17 | Reward : 17
Episode : 81 | Step : 18 | Reward : 14
Episode : 81 | Step : 19 | Reward : 11
Episode : 81 | Step : 20 | Reward : 10
Episode : 81 | Step : 21 | Reward : 16
Episode : 81 | Step : 22 | Reward : 19
Episode : 81 | Step : 23 | Reward : 18
Episode : 81 | Step : 24 | Reward : 19
Episode : 81 | Step : 25 | Reward : 18
Episode : 81 | Step : 26 | Reward 

Episode : 83 | Step : 44 | Reward : 15
Episode : 83 | Step : 45 | Reward : 15
Episode : 83 | Step : 46 | Reward : 15
Episode : 83 | Step : 47 | Reward : 20
Episode : 83 | Step : 48 | Reward : 16
Episode : 83 | Step : 49 | Reward : 12
Episode : 83 | Step : 50 | Reward : 10
Episode : 83 | Step : 51 | Reward : 11
Episode : 83 | Step : 52 | Reward : 10
Episode : 83 | Step : 53 | Reward : 9
Episode : 83 | Step : 54 | Reward : 9
Episode : 83 | Step : 55 | Reward : 10
Episode : 83 | Step : 56 | Reward : 10
Episode : 83 | Step : 57 | Reward : 11
Episode : 83 | Step : 58 | Reward : 12
Episode : 83 | Step : 59 | Reward : 13
Episode : 83 | Step : 60 | Reward : 16
Episode : 83 | Step : 61 | Reward : 15
Episode : 83 | Step : 62 | Reward : 13
Episode : 83 | Step : 63 | Reward : 13
Episode : 83 | Step : 64 | Reward : 14
Episode : 83 | Step : 65 | Reward : 15
Episode : 83 | Step : 66 | Reward : 14
Episode : 83 | Step : 67 | Reward : 15
Episode : 83 | Step : 68 | Reward : 13
Episode : 83 | Step : 69 | 

Episode : 85 | Step : 73 | Reward : 18
Episode : 85 | Step : 74 | Reward : 15
Episode : 85 | Step : 75 | Reward : 18
Episode : 85 | Step : 76 | Reward : 18
Episode : 85 | Step : 77 | Reward : 15
Episode : 85 | Step : 78 | Reward : 13
Episode : 85 | Step : 79 | Reward : 15
Episode : 85 | Step : 80 | Reward : 12
Episode : 85 | Step : 81 | Reward : 13
Episode : 85 | Step : 82 | Reward : 13
Episode : 85 | Step : 83 | Reward : 13
Episode : 85 | Step : 84 | Reward : 16
Episode : 85 | Step : 85 | Reward : 13
Episode : 85 | Step : 86 | Reward : 11
Episode : 85 | Step : 87 | Reward : 10
Episode : 85 | Step : 88 | Reward : 11
Episode : 85 | Step : 89 | Reward : 17
Episode : 85 | Step : 90 | Reward : 15
Episode : 85 | Step : 91 | Reward : 17
Episode : 85 | Step : 92 | Reward : 17
Episode : 85 | Step : 93 | Reward : 15
Episode : 85 | Step : 94 | Reward : 16
Episode : 85 | Step : 95 | Reward : 19
Episode : 85 | Step : 96 | Reward : 14
Episode : 85 | Step : 97 | Reward : 14
Episode : 85 | Step : 98 

Episode : 88 | Step : 42 | Reward : 11
Episode : 88 | Step : 43 | Reward : 11
Episode : 88 | Step : 44 | Reward : 13
Episode : 88 | Step : 45 | Reward : 12
Episode : 88 | Step : 46 | Reward : 14
Episode : 88 | Step : 47 | Reward : 14
Episode : 88 | Step : 48 | Reward : 19
Episode : 88 | Step : 49 | Reward : 18
Episode : 88 | Step : 50 | Reward : 16
Episode : 88 | Step : 51 | Reward : 13
Episode : 88 | Step : 52 | Reward : 12
Episode : 88 | Step : 53 | Reward : 15
Episode : 88 | Step : 54 | Reward : 16
Episode : 88 | Step : 55 | Reward : 17
Episode : 88 | Step : 56 | Reward : 16
Episode : 88 | Step : 57 | Reward : 16
Episode : 88 | Step : 58 | Reward : 13
Episode : 88 | Step : 59 | Reward : 14
Episode : 88 | Step : 60 | Reward : 13
Episode : 88 | Step : 61 | Reward : 18
Episode : 88 | Step : 62 | Reward : 17
Episode : 88 | Step : 63 | Reward : 16
Episode : 88 | Step : 64 | Reward : 16
Episode : 88 | Step : 65 | Reward : 17
Episode : 88 | Step : 66 | Reward : 16
Episode : 88 | Step : 67 

Episode : 90 | Step : 85 | Reward : 14
Episode : 90 | Step : 86 | Reward : 15
Episode : 90 | Step : 87 | Reward : 15
Episode : 90 | Step : 88 | Reward : 14
Episode : 90 | Step : 89 | Reward : 12
Episode : 90 | Step : 90 | Reward : 13
Episode : 90 | Step : 91 | Reward : 12
Episode : 90 | Step : 92 | Reward : 12
Episode : 90 | Step : 93 | Reward : 15
Episode : 90 | Step : 94 | Reward : 14
Episode : 90 | Step : 95 | Reward : 13
Episode : 90 | Step : 96 | Reward : 11
Episode : 90 | Step : 97 | Reward : 14
Episode : 90 | Step : 98 | Reward : 14
Episode : 90 | Step : 99 | Reward : 15
Cube initialized!
Episode : 91 | Step : 1 | Reward : 13
Episode : 91 | Step : 2 | Reward : 18
Episode : 91 | Step : 3 | Reward : 14
Episode : 91 | Step : 4 | Reward : 12
Episode : 91 | Step : 5 | Reward : 10
Episode : 91 | Step : 6 | Reward : 12
Episode : 91 | Step : 7 | Reward : 11
Episode : 91 | Step : 8 | Reward : 11
Episode : 91 | Step : 9 | Reward : 13
Episode : 91 | Step : 10 | Reward : 13
Episode : 91 | S

Episode : 93 | Step : 53 | Reward : 12
Episode : 93 | Step : 54 | Reward : 11
Episode : 93 | Step : 55 | Reward : 12
Episode : 93 | Step : 56 | Reward : 15
Episode : 93 | Step : 57 | Reward : 13
Episode : 93 | Step : 58 | Reward : 12
Episode : 93 | Step : 59 | Reward : 11
Episode : 93 | Step : 60 | Reward : 12
Episode : 93 | Step : 61 | Reward : 11
Episode : 93 | Step : 62 | Reward : 12
Episode : 93 | Step : 63 | Reward : 9
Episode : 93 | Step : 64 | Reward : 8
Episode : 93 | Step : 65 | Reward : 10
Episode : 93 | Step : 66 | Reward : 12
Episode : 93 | Step : 67 | Reward : 10
Episode : 93 | Step : 68 | Reward : 8
Episode : 93 | Step : 69 | Reward : 9
Episode : 93 | Step : 70 | Reward : 11
Episode : 93 | Step : 71 | Reward : 11
Episode : 93 | Step : 72 | Reward : 13
Episode : 93 | Step : 73 | Reward : 16
Episode : 93 | Step : 74 | Reward : 17
Episode : 93 | Step : 75 | Reward : 16
Episode : 93 | Step : 76 | Reward : 14
Episode : 93 | Step : 77 | Reward : 10
Episode : 93 | Step : 78 | Re

Episode : 95 | Step : 96 | Reward : 14
Episode : 95 | Step : 97 | Reward : 13
Episode : 95 | Step : 98 | Reward : 17
Episode : 95 | Step : 99 | Reward : 15
Cube initialized!
Episode : 96 | Step : 1 | Reward : 13
Episode : 96 | Step : 2 | Reward : 15
Episode : 96 | Step : 3 | Reward : 17
Episode : 96 | Step : 4 | Reward : 12
Episode : 96 | Step : 5 | Reward : 15
Episode : 96 | Step : 6 | Reward : 11
Episode : 96 | Step : 7 | Reward : 16
Episode : 96 | Step : 8 | Reward : 15
Episode : 96 | Step : 9 | Reward : 15
Episode : 96 | Step : 10 | Reward : 15
Episode : 96 | Step : 11 | Reward : 12
Episode : 96 | Step : 12 | Reward : 13
Episode : 96 | Step : 13 | Reward : 12
Episode : 96 | Step : 14 | Reward : 13
Episode : 96 | Step : 15 | Reward : 10
Episode : 96 | Step : 16 | Reward : 10
Episode : 96 | Step : 17 | Reward : 12
Episode : 96 | Step : 18 | Reward : 11
Episode : 96 | Step : 19 | Reward : 12
Episode : 96 | Step : 20 | Reward : 13
Episode : 96 | Step : 21 | Reward : 11
Episode : 96 | S

Episode : 98 | Step : 43 | Reward : 11
Episode : 98 | Step : 44 | Reward : 10
Episode : 98 | Step : 45 | Reward : 13
Episode : 98 | Step : 46 | Reward : 14
Episode : 98 | Step : 47 | Reward : 15
Episode : 98 | Step : 48 | Reward : 16
Episode : 98 | Step : 49 | Reward : 14
Episode : 98 | Step : 50 | Reward : 13
Episode : 98 | Step : 51 | Reward : 12
Episode : 98 | Step : 52 | Reward : 11
Episode : 98 | Step : 53 | Reward : 12
Episode : 98 | Step : 54 | Reward : 11
Episode : 98 | Step : 55 | Reward : 10
Episode : 98 | Step : 56 | Reward : 13
Episode : 98 | Step : 57 | Reward : 13
Episode : 98 | Step : 58 | Reward : 14
Episode : 98 | Step : 59 | Reward : 13
Episode : 98 | Step : 60 | Reward : 14
Episode : 98 | Step : 61 | Reward : 13
Episode : 98 | Step : 62 | Reward : 13
Episode : 98 | Step : 63 | Reward : 12
Episode : 98 | Step : 64 | Reward : 13
Episode : 98 | Step : 65 | Reward : 12
Episode : 98 | Step : 66 | Reward : 13
Episode : 98 | Step : 67 | Reward : 16
Episode : 98 | Step : 68 