In [1]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
import time
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import os
import gym_Aircraft
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [2]:
env = gym.make("acav-v0")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [3]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """transition 저장"""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [4]:
class FClayer(nn.Module):
    def __init__(self, innodes: int, nodes: int):
        super(FClayer, self).__init__()
        self.fc=nn.Linear(innodes,nodes)
        self.act=nn.LeakyReLU(0.2, inplace=True)
    def forward(self, x: Tensor) -> Tensor:
        out=self.fc(x)
        out=self.act(out)
        return out
    
class WaveNET(nn.Module):
    def __init__(self, block: Type[Union[FClayer]], planes: List[int], nodes: List[int], num_classes: int = 3
                ) -> None:
        super(WaveNET, self).__init__()
        self.innodes=5
        
        self.layer1=self._make_layer(block, planes[0], nodes[0])
        self.layer2=self._make_layer(block, planes[1], nodes[1])
        self.layer3=self._make_layer(block, planes[2], nodes[2])
        
        self.fin_fc=nn.Linear(self.innodes,num_classes)
        
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
    
    def _make_layer(self, block: Type[Union[FClayer]], planes: int, nodes: int) -> nn.Sequential:

        layers = []
        layers.append(block(self.innodes, nodes))
        self.innodes = nodes
        for _ in range(1, planes):
            layers.append(block(self.innodes, nodes))

        return nn.Sequential(*layers)

        
    def _forward_impl(self, x: Tensor) -> Tensor:
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.fin_fc(x)
        
        return x
    
    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)

In [5]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10

In [6]:
policy_net = torch.load("./Custom_model_fin").to(device)
target_net = torch.load("./Custom_model_fin").to(device)
target_net.eval()



WaveNET(
  (layer1): Sequential(
    (0): FClayer(
      (fc): Linear(in_features=5, out_features=40, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
    (1): FClayer(
      (fc): Linear(in_features=40, out_features=40, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
  )
  (layer2): Sequential(
    (0): FClayer(
      (fc): Linear(in_features=40, out_features=20, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
    (1): FClayer(
      (fc): Linear(in_features=20, out_features=20, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
  )
  (layer3): Sequential(
    (0): FClayer(
      (fc): Linear(in_features=20, out_features=60, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
    (1): FClayer(
      (fc): Linear(in_features=60, out_features=60, bias=True)
      (act): LeakyReLU(negative_slope=0.2, inplace=True)
    )
  )
  (fin_fc): Linear(in_features=60, out_featu

In [7]:
n_actions = env.action_space.n

In [8]:
optimizer = optim.Adam(policy_net.parameters())
memory = ReplayMemory(50000)

In [9]:
steps_done = 0

In [10]:
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max (1)은 각 행의 가장 큰 열 값을 반환합니다.
            # 최대 결과의 두번째 열은 최대 요소의 주소값이므로,
            # 기대 보상이 더 큰 행동을 선택할 수 있습니다.
            return policy_net(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)

In [11]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)

    batch = Transition(*zip(*transitions))

    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    state_action_values = policy_net(state_batch).gather(1, action_batch)

    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
    # 기대 Q 값 계산
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Huber 손실 계산
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

    # 모델 최적화
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

In [None]:
num_episodes = 50000
total_res=[]
reward_list=[]
for i_episode in range(num_episodes):
    total_reward=0
    
    # 환경과 상태 초기화
    res_list=np.zeros(11)
    state = env.reset()
    state=torch.from_numpy(state.astype(np.float32)).unsqueeze(0).to(device)
    for t in count():
        # 행동 선택과 수행
        
        action = select_action(state)
        next_state, reward, done, res = env.step(action.item())
        reward = torch.tensor([reward], dtype=torch.float32).to(device)
        
        next_state=torch.from_numpy(next_state.astype(np.float32)).unsqueeze(0).to(device)

        # 새로운 상태 관찰
        if not done:
            next_state = next_state
        else:
            next_state = None

        # 메모리에 변이 저장
        memory.push(state, action, next_state, reward)

        # 다음 상태로 이동
        state = next_state

        # 최적화 한단계 수행(목표 네트워크에서)
        optimize_model()
        
        # Data save
        
        cmd_list,r_list,elev_list,azim_list,Pm_list,Pt_list,h_list=res
        Pm_list=Pm_list.tolist()
        Pt_list=Pt_list.tolist()
        merged_data=itertools.chain([cmd_list],[r_list],[elev_list],[azim_list],Pm_list,Pt_list,[h_list])
        merged_data=np.array(list(merged_data))
        res_list=np.vstack([res_list,merged_data])
        
        total_reward+=reward
        
        if done:
            res_list=np.delete(res_list,0,0)
            
            total_res.append(res_list)
            reward_list.append(total_reward)
            
            now = time.localtime()
#             print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))
            print(f"episode: {i_episode:4d} | final step: {t:3d} | reward: {total_reward.item():4.1f} "
                  f"| sum(R) last 100: {sum(reward_list[-100:]).cpu().numpy()[0]}")
            break
            
        
    #목표 네트워크 업데이트, 모든 웨이트와 바이어스 복사
    if i_episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

print('Complete')
env.close()

episode:    0 | final step:  98 | reward: -1.0 | E[R] last 100: -1.0
episode:    1 | final step:  98 | reward: -1.0 | E[R] last 100: -2.0
episode:    2 | final step: 226 | reward: 49.6 | E[R] last 100: 47.64065933227539
episode:    3 | final step:  98 | reward: -1.0 | E[R] last 100: 46.64065933227539
episode:    4 | final step:  98 | reward: -1.0 | E[R] last 100: 45.64065933227539
episode:    5 | final step:  98 | reward: -1.0 | E[R] last 100: 44.64065933227539
episode:    6 | final step:  98 | reward: -1.0 | E[R] last 100: 43.64065933227539
episode:    7 | final step:  98 | reward: -1.0 | E[R] last 100: 42.64065933227539
episode:    8 | final step:  98 | reward: -1.0 | E[R] last 100: 41.64065933227539
episode:    9 | final step:  98 | reward: -1.0 | E[R] last 100: 40.64065933227539
episode:   10 | final step:  98 | reward: -1.0 | E[R] last 100: 39.64065933227539
episode:   11 | final step:  98 | reward: -1.0 | E[R] last 100: 38.64065933227539
episode:   12 | final step:  98 | reward: 

episode:  100 | final step:  98 | reward: -1.0 | E[R] last 100: 456.2474365234375
episode:  101 | final step:  98 | reward: -1.0 | E[R] last 100: 456.2474365234375
episode:  102 | final step:  98 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  103 | final step:  98 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  104 | final step:  99 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  105 | final step:  98 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  106 | final step:  98 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  107 | final step:  99 | reward: -1.0 | E[R] last 100: 405.6067810058594
episode:  108 | final step: 233 | reward: 49.7 | E[R] last 100: 456.2856750488281
episode:  109 | final step:  98 | reward: -1.0 | E[R] last 100: 456.2856750488281
episode:  110 | final step:  98 | reward: -1.0 | E[R] last 100: 456.2856750488281
episode:  111 | final step:  98 | reward: -1.0 | E[R] last 100: 456.2856750488281
episode:  112 | 

episode:  200 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  201 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  202 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  203 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  204 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  205 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  206 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  207 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5552978515625
episode:  208 | final step:  98 | reward: -1.0 | E[R] last 100: 51.876380920410156
episode:  209 | final step:  98 | reward: -1.0 | E[R] last 100: 51.876380920410156
episode:  210 | final step:  98 | reward: -1.0 | E[R] last 100: 51.876380920410156
episode:  211 | final step:  98 | reward: -1.0 | E[R] last 100: 51.876380920410156
episode:  21

episode:  304 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  305 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  306 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  307 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  308 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  309 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  310 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  311 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  312 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  313 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  314 | final step:  98 | reward: -1.0 | E[R] last 100: -49.420841217041016
episode:  315 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42084121

episode:  407 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  408 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  409 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  410 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  411 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode:  412 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode:  413 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  414 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  415 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  416 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  417 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  418 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  419 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode:  420 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode:  520 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  521 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  522 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  523 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  524 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  525 | final step:  99 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  526 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  527 | final step:  99 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  528 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  529 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  530 | final step:  98 | reward: -1.0 | E[R] last 100: -49.340919494628906
episode:  531 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34091949

episode:  620 | final step:  98 | reward: -1.0 | E[R] last 100: 1.280517578125
episode:  621 | final step:  98 | reward: -1.0 | E[R] last 100: 1.280517578125
episode:  622 | final step:  98 | reward: -1.0 | E[R] last 100: 1.280517578125
episode:  623 | final step:  98 | reward: -1.0 | E[R] last 100: 1.280517578125
episode:  624 | final step:  98 | reward: -1.0 | E[R] last 100: 1.280517578125
episode:  625 | final step: 226 | reward: 49.7 | E[R] last 100: 51.966217041015625
episode:  626 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  627 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  628 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  629 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  630 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  631 | final step:  98 | reward: -1.0 | E[R] last 100: 51.966217041015625
episode:  632 | final st

episode:  719 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  720 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  721 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  722 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  723 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  724 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3789596557617188
episode:  725 | final step:  99 | reward: -1.0 | E[R] last 100: -49.306739807128906
episode:  726 | final step:  98 | reward: -1.0 | E[R] last 100: -49.306739807128906
episode:  727 | final step:  98 | reward: -1.0 | E[R] last 100: -49.306739807128906
episode:  728 | final step:  98 | reward: -1.0 | E[R] last 100: -49.306739807128906
episode:  729 | final step:  98 | reward: -1.0 | E[R] last 100: -49.306739807128906
episode:  730 | final step:  99 | reward: -1.0 | E[R] last 100: -49.30673980712890

episode:  820 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  821 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  822 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  823 | final step:  99 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  824 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  825 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  826 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  827 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  828 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  829 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  830 | final step:  98 | reward: -1.0 | E[R] last 100: -49.313621520996094
episode:  831 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31362152

episode:  918 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  919 | final step:  99 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  920 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  921 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  922 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  923 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  924 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  925 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  926 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  927 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  928 | final step:  98 | reward: -1.0 | E[R] last 100: -49.349178314208984
episode:  929 | final step:  99 | reward: -1.0 | E[R] last 100: -49.34917831

episode: 1026 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1027 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1028 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1029 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1030 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1031 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1032 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1033 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1034 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1035 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1036 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1037 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1038 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1039 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1142 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1143 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1144 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1145 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1146 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1147 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1148 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1149 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1150 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1151 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1152 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1153 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 1154 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1155 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1258 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1259 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1260 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1261 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1262 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1263 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1264 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1265 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1266 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1267 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1268 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1269 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1270 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1271 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1374 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1375 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1376 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1377 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1378 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1379 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1380 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1381 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1382 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1383 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1384 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1385 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1386 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1387 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1489 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1490 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1491 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1492 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1493 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1494 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1495 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1496 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1497 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1498 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1499 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
episode: 1500 | final step:  98 | reward: -1.0 | E[R] last 100: -49.47496032714844
epis

episode: 1588 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1589 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1590 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1591 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1592 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1593 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1594 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1595 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1596 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1597 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1598 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
episode: 1599 | final step:  98 | reward: -1.0 | E[R] last 100: -49.24306106567383
epis

episode: 1691 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 1692 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1693 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1694 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1695 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1696 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1697 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1698 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1699 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1700 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 1701 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1702 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1703 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1704 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1807 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1808 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1809 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1810 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1811 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1812 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1813 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1814 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 1815 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1816 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1817 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1818 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1819 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1820 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 1923 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1924 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1925 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1926 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1927 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1928 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1929 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1930 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1931 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1932 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1933 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1934 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 1935 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 1936 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2034 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2035 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2036 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2037 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2038 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2039 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2040 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2041 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2042 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2043 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2044 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
episode: 2045 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37723922729492
epis

episode: 2138 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2139 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2140 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2141 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2142 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2143 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2144 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2145 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2146 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2147 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2148 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2149 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2150 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2151 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2254 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2255 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2256 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2257 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2258 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2259 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2260 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2261 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2262 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2263 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2264 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2265 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2266 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2267 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2362 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2363 | final step:  99 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2364 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2365 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2366 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2367 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2368 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2369 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2370 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2371 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2372 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2373 | final step:  98 | reward: -1.0 | E[R] last 100: 1.29229736328125
episode: 2374 | final step: 

episode: 2466 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2467 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2468 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2469 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2470 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2471 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2472 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2473 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2474 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2475 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2476 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2477 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2478 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2479 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2582 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2583 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2584 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2585 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2586 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2587 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2588 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2589 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 2590 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2591 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2592 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2593 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2594 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2595 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2698 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2699 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2700 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 2701 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2702 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2703 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2704 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2705 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2706 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2707 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2708 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2709 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2710 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2711 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2814 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2815 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2816 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2817 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2818 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2819 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2820 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2821 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2822 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2823 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2824 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2825 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 2826 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2827 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 2930 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2931 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2932 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2933 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2934 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2935 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2936 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 2937 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2938 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 2939 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2940 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2941 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2942 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 2943 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 3046 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3047 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3048 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3049 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3050 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3051 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3052 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3053 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3054 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 3055 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3056 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3057 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 3058 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3059 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 3162 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3163 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3164 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3165 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3166 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3167 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3168 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3169 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3170 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3171 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3172 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3173 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3174 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3175 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 3278 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3279 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3280 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3281 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3282 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3283 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3284 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3285 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3286 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3287 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3288 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3289 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3290 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 3291 | final step: 231 | reward: 49.5 | E[R] last 100: -49.515178680

episode: 3379 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3380 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3381 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3382 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3383 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3384 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3385 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3386 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3387 | final step:  99 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3388 | final step:  99 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3389 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
episode: 3390 | final step:  98 | reward: -1.0 | E[R] last 100: 152.61203002929688
epis

episode: 3480 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3481 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3482 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3483 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3484 | final step:  99 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3485 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3486 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3487 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3488 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3489 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3490 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
episode: 3491 | final step:  98 | reward: -1.0 | E[R] last 100: 52.048500061035156
epis

episode: 3580 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3581 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3582 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3583 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3584 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3585 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2274589538574219
episode: 3586 | final step: 225 | reward: 49.7 | E[R] last 100: 51.947837829589844
episode: 3587 | final step:  98 | reward: -1.0 | E[R] last 100: 51.947837829589844
episode: 3588 | final step:  98 | reward: -1.0 | E[R] last 100: 51.947837829589844
episode: 3589 | final step:  98 | reward: -1.0 | E[R] last 100: 51.947837829589844
episode: 3590 | final step:  99 | reward: -1.0 | E[R] last 100: 51.947837829589844
episode: 3591 | final step:  98 | reward: -1.0 | E[R] last 100: 51.947837829589844
epis

episode: 3679 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3680 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3681 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3682 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3683 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3684 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3685 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2947196960449219
episode: 3686 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4256591796875
episode: 3687 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4256591796875
episode: 3688 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4256591796875
episode: 3689 | final step:  99 | reward: -1.0 | E[R] last 100: -49.4256591796875
episode: 3690 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4256591796875
episode: 

episode: 3779 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3780 | final step:  99 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3781 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3782 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3783 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3784 | final step:  99 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3785 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3786 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3787 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872039794922
episode: 3788 | final step:  99 | reward: -1.0 | E[R] last 100: 102.50872802734375
episode: 3789 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872802734375
episode: 3790 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50872802734375
epis

episode: 3878 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3879 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3880 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3881 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3882 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3883 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3884 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3885 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3886 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3887 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3888 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
episode: 3889 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2560997009277344
epis

episode: 3977 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3978 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3979 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3980 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3981 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3982 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3983 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3984 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3985 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3986 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3987 | final step:  98 | reward: -1.0 | E[R] last 100: -49.397361755371094
episode: 3988 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39736175

episode: 4077 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4078 | final step:  99 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4079 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4080 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4081 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4082 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4083 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4084 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4085 | final step:  98 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4086 | final step:  99 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4087 | final step:  99 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4088 | final step:  99 | reward: -1.0 | E[R] last 100: 1.223419189453125
episode: 4089 | 

episode: 4176 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4177 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4178 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4179 | final step:  99 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4180 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4181 | final step: 100 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4182 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4183 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4184 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4185 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4186 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4187 | final step:  99 | reward: -1.0 | E[R] last 100: -49.3760986328125
episode: 4188 | 

episode: 4276 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4277 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4278 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4279 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4280 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4281 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4282 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4283 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4284 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4285 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93063735961914
episode: 4286 | final step:  98 | reward: -1.0 | E[R] last 100: 51.930633544921875
episode: 4287 | final step:  98 | reward: -1.0 | E[R] last 100: 51.930633544921875
episode: 4288 

episode: 4376 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4377 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4378 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4379 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4380 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4381 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4382 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4383 | final step:  99 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4384 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4385 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4386 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
episode: 4387 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39535903930664
epis

episode: 4477 | final step:  98 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4478 | final step:  98 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4479 | final step:  99 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4480 | final step:  98 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4481 | final step:  98 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4482 | final step:  98 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4483 | final step:  99 | reward: -1.0 | E[R] last 100: -49.404541015625
episode: 4484 | final step: 229 | reward: 49.6 | E[R] last 100: 1.1861801147460938
episode: 4485 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1861801147460938
episode: 4486 | final step: 230 | reward: 49.7 | E[R] last 100: 51.85414123535156
episode: 4487 | final step:  98 | reward: -1.0 | E[R] last 100: 51.85414123535156
episode: 4488 | final step:  98 | reward: -1.0 | E[R] last 100: 51.85414123535156
episode: 4489 | final

episode: 4577 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4578 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4579 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4580 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4581 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4582 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4583 | final step:  98 | reward: -1.0 | E[R] last 100: 51.912017822265625
episode: 4584 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3213043212890625
episode: 4585 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3213043212890625
episode: 4586 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34666061401367
episode: 4587 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34666061401367
episode: 4588 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34666061401367
epis

episode: 4684 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4685 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4686 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4687 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4688 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4689 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4690 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4691 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4692 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4693 | final step:  99 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4694 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
episode: 4695 | final step:  98 | reward: -1.0 | E[R] last 100: -49.30665969848633
epis

episode: 4783 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4784 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4785 | final step: 100 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4786 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4787 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4788 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4789 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4790 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4791 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4792 | final step:  99 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4793 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
episode: 4794 | final step:  98 | reward: -1.0 | E[R] last 100: 102.65336608886719
epis

episode: 4882 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4883 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4884 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4885 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4886 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4887 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4888 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4889 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4890 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4891 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4892 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
episode: 4893 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887382507324219
epis

episode: 4982 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4983 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4984 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4985 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4986 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4987 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4988 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4989 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4990 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4991 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4992 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
episode: 4993 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3705787658691406
epis

episode: 5090 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5091 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5092 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5093 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5094 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5095 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5096 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5097 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5098 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5099 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5100 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5101 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5102 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5103 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 5193 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5194 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5195 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5196 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5197 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5198 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5199 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5200 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5201 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5202 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5203 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5204 | final step:  98 | reward: -1.0 | E[R] last 100: 51.93568420410156
episode: 5205 | 

episode: 5292 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5293 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5294 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5295 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5296 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5297 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5298 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5299 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5300 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5301 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5302 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
episode: 5303 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3384819030761719
epis

episode: 5391 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5392 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5393 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5394 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5395 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5396 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5397 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5398 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5399 | final step: 100 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5400 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5401 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4434013366699219
episode: 5402 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4434013366699219
epis

episode: 5492 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5493 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5494 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5495 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5496 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5497 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5498 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5499 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5500 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5501 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5502 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5503 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5504 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5505 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 5599 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5600 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5601 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5602 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5603 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5604 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5605 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5606 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5607 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5608 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5609 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
episode: 5610 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4596977233886719
epis

episode: 5706 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5707 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5708 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5709 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5710 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5711 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5712 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5713 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5714 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5715 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5716 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5717 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5718 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5719 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 5817 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5818 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5819 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5820 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5821 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5822 | final step:  99 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5823 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5824 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5825 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5826 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5827 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
episode: 5828 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46546173095703
epis

episode: 5921 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5922 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5923 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5924 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5925 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5926 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5927 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5928 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5929 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5930 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5931 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5932 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 5933 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 5934 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 6037 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6038 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6039 | final step: 226 | reward: 49.6 | E[R] last 100: -49.38959884643555
episode: 6040 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6041 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6042 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6043 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6044 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6045 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6046 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6047 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6048 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38959884643555
episode: 6049 | final step: 

episode: 6137 | final step: 237 | reward: 49.7 | E[R] last 100: 102.4378662109375
episode: 6138 | final step:  99 | reward: -1.0 | E[R] last 100: 102.4378662109375
episode: 6139 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6140 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6141 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6142 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6143 | final step:  99 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6144 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6145 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6146 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6147 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6148 | final step:  98 | reward: -1.0 | E[R] last 100: 51.82746124267578
episode: 6149 | 

episode: 6237 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6238 | final step: 100 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6239 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6240 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6241 | final step:  99 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6242 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6243 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6244 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6245 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6246 | final step:  99 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6247 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
episode: 6248 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0749778747558594
epis

episode: 6337 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6338 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6339 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6340 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6341 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6342 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6343 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6344 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 6345 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 6346 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6347 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6348 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6349 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6350 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 6448 | final step:  99 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6449 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6450 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6451 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6452 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6453 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6454 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6455 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6456 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6457 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6458 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6459 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3831787109375
episode: 6460 | 

episode: 6548 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6549 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6550 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6551 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6552 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6553 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6554 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6555 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6556 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6557 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6558 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
episode: 6559 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42036056518555
epis

episode: 6659 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6660 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6661 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6662 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6663 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6664 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6665 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6666 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6667 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6668 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6669 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6670 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6671 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6672 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 6775 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 6776 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6777 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6778 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6779 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6780 | final step: 226 | reward: 49.8 | E[R] last 100: -49.21841812133789
episode: 6781 | final step:  98 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6782 | final step:  98 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6783 | final step:  98 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6784 | final step:  98 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6785 | final step:  99 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6786 | final step:  99 | reward: -1.0 | E[R] last 100: -49.21841812133789
episode: 6787 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 6875 | final step:  98 | reward: -1.0 | E[R] last 100: 102.84776306152344
episode: 6876 | final step:  98 | reward: -1.0 | E[R] last 100: 102.84776306152344
episode: 6877 | final step: 100 | reward: -1.0 | E[R] last 100: 102.84776306152344
episode: 6878 | final step:  98 | reward: -1.0 | E[R] last 100: 102.84776306152344
episode: 6879 | final step:  98 | reward: -1.0 | E[R] last 100: 102.84776306152344
episode: 6880 | final step:  98 | reward: -1.0 | E[R] last 100: 52.06617736816406
episode: 6881 | final step:  98 | reward: -1.0 | E[R] last 100: 52.06617736816406
episode: 6882 | final step:  98 | reward: -1.0 | E[R] last 100: 52.06617736816406
episode: 6883 | final step:  98 | reward: -1.0 | E[R] last 100: 52.06617736816406
episode: 6884 | final step:  98 | reward: -1.0 | E[R] last 100: 52.06617736816406
episode: 6885 | final step: 228 | reward: 49.6 | E[R] last 100: 102.69961547851562
episode: 6886 | final step:  98 | reward: -1.0 | E[R] last 100: 102.69961547851562
episode: 

episode: 6974 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6975 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6976 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6977 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6978 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6979 | final step:  99 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6980 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6981 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6982 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6983 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6984 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36656188964844
episode: 6985 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 6986 | 

episode: 7080 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7081 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7082 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7083 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7084 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7085 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7086 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7087 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7088 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7089 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7090 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
episode: 7091 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2887992858886719
epis

episode: 7181 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7182 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7183 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 7184 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7185 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7186 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7187 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7188 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7189 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7190 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7191 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7192 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7193 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7194 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 7297 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7298 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7299 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7300 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7301 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7302 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7303 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7304 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7305 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7306 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7307 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7308 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7309 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7310 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 7410 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7411 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7412 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7413 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7414 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7415 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7416 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7417 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7418 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7419 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7420 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
episode: 7421 | final step:  98 | reward: -1.0 | E[R] last 100: -49.10015869140625
epis

episode: 7511 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7512 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7513 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7514 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7515 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7516 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7517 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7518 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7519 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7520 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7521 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
episode: 7522 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40718078613281
epis

episode: 7610 | final step: 100 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7611 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7612 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7613 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7614 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7615 | final step:  99 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7616 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7617 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7618 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7619 | final step:  99 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7620 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7621 | final step:  99 | reward: -1.0 | E[R] last 100: -49.2221794128418
episode: 7622 | 

episode: 7713 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7714 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7715 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7716 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7717 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7718 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7719 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7720 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7721 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7722 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7723 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7724 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7725 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7726 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 7829 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7830 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7831 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7832 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7833 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7834 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7835 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7836 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7837 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7838 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7839 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7840 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7841 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7842 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 7945 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7946 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7947 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7948 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7949 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7950 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7951 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7952 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7953 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7954 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7955 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7956 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7957 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 7958 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8061 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8062 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8063 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8064 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8065 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8066 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8067 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8068 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8069 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8070 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8071 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8072 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8073 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8074 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8177 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8178 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8179 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8180 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8181 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8182 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8183 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8184 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8185 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8186 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8187 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8188 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8189 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8190 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8282 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0364799499511719
episode: 8283 | final step: 226 | reward: 49.5 | E[R] last 100: 51.550540924072266
episode: 8284 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8285 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8286 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8287 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8288 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8289 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8290 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8291 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8292 | final step:  98 | reward: -1.0 | E[R] last 100: 51.550540924072266
episode: 8293 | final step:  99 | reward: -1.0 | E[R] last 100: 51.550540924072266
epis

episode: 8381 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1959609985351562
episode: 8382 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1959609985351562
episode: 8383 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8384 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8385 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8386 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8387 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8388 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8389 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8390 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8391 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
episode: 8392 | final step:  98 | reward: -1.0 | E[R] last 100: -49.31809997558594
epis

episode: 8494 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8495 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8496 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8497 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8498 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8499 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8500 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8501 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8502 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8503 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8504 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8505 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8506 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8507 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8610 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8611 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8612 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8613 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8614 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8615 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8616 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8617 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8618 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8619 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8620 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8621 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8622 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8623 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8726 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8727 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8728 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8729 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8730 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8731 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8732 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8733 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8734 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8735 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8736 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8737 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8738 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8739 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 8840 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8841 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8842 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8843 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8844 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8845 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8846 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8847 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8848 | final step:  99 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8849 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8850 | final step:  99 | reward: -1.0 | E[R] last 100: 1.0184402465820312
episode: 8851 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0184402465820312
epis

episode: 8940 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8941 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 8942 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8943 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8944 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8945 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8946 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8947 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8948 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8949 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8950 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8951 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8952 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 8953 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 9050 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9051 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9052 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9053 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9054 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9055 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9056 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9057 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9058 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9059 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9060 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
episode: 9061 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3182792663574219
epis

episode: 9151 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9152 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 9153 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 9154 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9155 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9156 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9157 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9158 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9159 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9160 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9161 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9162 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9163 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9164 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 9257 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9258 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9259 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9260 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9261 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9262 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9263 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9264 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9265 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9266 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9267 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
episode: 9268 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2998199462890625
epis

episode: 9362 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9363 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9364 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9365 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9366 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9367 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9368 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9369 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9370 | final step: 231 | reward: 49.7 | E[R] last 100: -49.29819869995117
episode: 9371 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29819869995117
episode: 9372 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29819869995117
episode: 9373 | final step:  99 | reward: -1.0 | E[R] last 100: -49.29819869995117
episode: 9374 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29819869995117
episode: 9375 | f

episode: 9463 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9464 | final step:  99 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9465 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9466 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9467 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9468 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9469 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97838592529297
episode: 9470 | final step:  99 | reward: -1.0 | E[R] last 100: 1.276580810546875
episode: 9471 | final step:  99 | reward: -1.0 | E[R] last 100: 1.276580810546875
episode: 9472 | final step:  98 | reward: -1.0 | E[R] last 100: 1.276580810546875
episode: 9473 | final step:  98 | reward: -1.0 | E[R] last 100: 1.276580810546875
episode: 9474 | final step:  98 | reward: -1.0 | E[R] last 100: 1.276580810546875
episode: 9475 | 

episode: 9562 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9563 | final step:  99 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9564 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9565 | final step:  99 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9566 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9567 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976531982421875
episode: 9568 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
episode: 9569 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
episode: 9570 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
episode: 9571 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
episode: 9572 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
episode: 9573 | final step:  98 | reward: -1.0 | E[R] last 100: 51.976539611816406
epis

episode: 9661 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9662 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9663 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9664 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9665 | final step:  99 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9666 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9667 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9668 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9669 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9670 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9671 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
episode: 9672 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34347915649414
epis

episode: 9770 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9771 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9772 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9773 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9774 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 9775 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9776 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9777 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9778 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9779 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9780 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9781 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9782 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 9783 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episod

episode: 9880 | final step:  99 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9881 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9882 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9883 | final step:  99 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9884 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9885 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9886 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9887 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9888 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29365921020508
episode: 9889 | final step: 226 | reward: 49.6 | E[R] last 100: 1.326080322265625
episode: 9890 | final step:  98 | reward: -1.0 | E[R] last 100: 1.326080322265625
episode: 9891 | final step:  98 | reward: -1.0 | E[R] last 100: 1.326080322265625
episode

episode: 9980 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9981 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9982 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9983 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9984 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9985 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9986 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9987 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9988 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3152999877929688
episode: 9989 | final step:  98 | reward: -1.0 | E[R] last 100: -49.304439544677734
episode: 9990 | final step:  98 | reward: -1.0 | E[R] last 100: -49.304439544677734
episode: 9991 | final step:  98 | reward: -1.0 | E[R] last 100: -49.304439544677734
e

episode: 10083 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 10084 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 10085 | final step: 100 | reward: -1.0 | E[R] last 100: -100.0
episode: 10086 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 10087 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 10088 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 10089 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 10090 | final step: 231 | reward: 49.6 | E[R] last 100: -49.42127990722656
episode: 10091 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42127990722656
episode: 10092 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42127990722656
episode: 10093 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42127990722656
episode: 10094 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42127990722656
episode: 10095 | final step:  99 | reward: -1.0 | E[R] last 100: -49.4212799

episode: 10182 | final step:  99 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10183 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10184 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10185 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10186 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10187 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10188 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10189 | final step:  98 | reward: -1.0 | E[R] last 100: 52.11985778808594
episode: 10190 | final step:  98 | reward: -1.0 | E[R] last 100: 1.5411415100097656
episode: 10191 | final step:  98 | reward: -1.0 | E[R] last 100: 1.5411415100097656
episode: 10192 | final step:  98 | reward: -1.0 | E[R] last 100: 1.5411415100097656
episode: 10193 | final step:  98 | reward: -1.0 | E[R] last 100: 1.5411415100097656


episode: 10281 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10282 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10283 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10284 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10285 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10286 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10287 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10288 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10289 | final step:  99 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10290 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10291 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
episode: 10292 | final step:  98 | reward: -1.0 | E[R] last 100: 152.9337615966797
epis

episode: 10379 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10380 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10381 | final step:  99 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10382 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10383 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10384 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10385 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10386 | final step:  99 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10387 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10388 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10389 | final step:  98 | reward: -1.0 | E[R] last 100: 52.072601318359375
episode: 10390 | final step:  98 | reward: -1.0 | E[R] last 100: 52.07260131

episode: 10478 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10479 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10480 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10481 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10482 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10483 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10484 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10485 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10486 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10487 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10488 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3203010559082031
episode: 10489 | final step:  98 | reward: -1.0 | E[R] last 100: 1.320301055

episode: 10576 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10577 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10578 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10579 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10580 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10581 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10582 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10583 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10584 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10585 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10586 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3317031860351562
episode: 10587 | final step:  98 | reward: -1.0 | E[R] last 100: 1.331703186

episode: 10675 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10676 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10677 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10678 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10679 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10680 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10681 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10682 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10683 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10684 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10685 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
episode: 10686 | final step:  98 | reward: -1.0 | E[R] last 100: 51.92295837402344
epis

episode: 10773 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10774 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10775 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10776 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10777 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10778 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10779 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10780 | final step:  99 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10781 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10782 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10783 | final step:  98 | reward: -1.0 | E[R] last 100: -49.379459381103516
episode: 10784 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 10876 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10877 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10878 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10879 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10880 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10881 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10882 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10883 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10884 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10885 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10886 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1649208068847656
episode: 10887 | final step:  98 | reward: -1.0 | E[R] last 100: 1.164920806

episode: 10975 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10976 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10977 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10978 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10979 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10980 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10981 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10982 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10983 | final step:  99 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10984 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10985 | final step:  98 | reward: -1.0 | E[R] last 100: -49.37498092651367
episode: 10986 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3749809

episode: 11075 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11076 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11077 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11078 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11079 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11080 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11081 | final step:  99 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11082 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11083 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11084 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11085 | final step:  98 | reward: -1.0 | E[R] last 100: -49.33808135986328
episode: 11086 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3380813

episode: 11177 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11178 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11179 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11180 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11181 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11182 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11183 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 11184 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11185 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11186 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11187 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11188 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11189 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 11190 | final step:  99 | reward: -1.0 | E[R] last 100:

episode: 11283 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11284 | final step:  99 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11285 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11286 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11287 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11288 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11289 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11290 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11291 | final step:  99 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11292 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11293 | final step:  98 | reward: -1.0 | E[R] last 100: -49.29418182373047
episode: 11294 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2941818

episode: 11387 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39789962768555
episode: 11388 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39789962768555
episode: 11389 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39789962768555
episode: 11390 | final step: 228 | reward: 49.6 | E[R] last 100: 1.2025604248046875
episode: 11391 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11392 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11393 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11394 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11395 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11396 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11397 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2025604248046875
episode: 11398 | final step:  98 | reward: -1.0 | E[R] last 100: 1.202560424

episode: 11485 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399539947509766
episode: 11486 | final step: 226 | reward: 49.8 | E[R] last 100: 1.3518409729003906
episode: 11487 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3518409729003906
episode: 11488 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3518409729003906
episode: 11489 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3518409729003906
episode: 11490 | final step:  98 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11491 | final step:  98 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11492 | final step:  98 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11493 | final step:  98 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11494 | final step:  98 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11495 | final step:  99 | reward: -1.0 | E[R] last 100: -49.248619079589844
episode: 11496 | final step:  98 | reward: -1.0 | E[R] last 100: -49.

episode: 11583 | final step:  98 | reward: -1.0 | E[R] last 100: 102.51203918457031
episode: 11584 | final step:  98 | reward: -1.0 | E[R] last 100: 102.51203918457031
episode: 11585 | final step:  98 | reward: -1.0 | E[R] last 100: 102.51203918457031
episode: 11586 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11587 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11588 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11589 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11590 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11591 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11592 | final step:  98 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11593 | final step:  99 | reward: -1.0 | E[R] last 100: 51.760658264160156
episode: 11594 | final step:  98 | reward: -1.0 | E[R] last 100: 51.76065826

episode: 11681 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11682 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11683 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11684 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11685 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11686 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11687 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11688 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11689 | final step:  99 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11690 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11691 | final step:  99 | reward: -1.0 | E[R] last 100: -49.44063949584961
episode: 11692 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4406394

episode: 11779 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11780 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11781 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11782 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11783 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11784 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11785 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11786 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11787 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11788 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11789 | final step:  98 | reward: -1.0 | E[R] last 100: -49.369998931884766
episode: 11790 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 11877 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11878 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11879 | final step:  99 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11880 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11881 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11882 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11883 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11884 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11885 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11886 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11887 | final step:  98 | reward: -1.0 | E[R] last 100: -49.38414001464844
episode: 11888 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3841400

episode: 11981 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11982 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11983 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11984 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11985 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11986 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11987 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11988 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11989 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11990 | final step:  99 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11991 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3540191650390625
episode: 11992 | final step:  99 | reward: -1.0 | E[R] last 100: 1.354019165

episode: 12079 | final step:  98 | reward: -1.0 | E[R] last 100: 204.01792907714844
episode: 12080 | final step:  98 | reward: -1.0 | E[R] last 100: 204.01792907714844
episode: 12081 | final step: 230 | reward: 49.7 | E[R] last 100: 254.75033569335938
episode: 12082 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12083 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12084 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12085 | final step:  99 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12086 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12087 | final step:  99 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12088 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12089 | final step:  98 | reward: -1.0 | E[R] last 100: 254.75033569335938
episode: 12090 | final step:  99 | reward: -1.0 | E[R] last 100: 254.7503356

episode: 12177 | final step:  98 | reward: -1.0 | E[R] last 100: 153.5113525390625
episode: 12178 | final step:  98 | reward: -1.0 | E[R] last 100: 153.5113525390625
episode: 12179 | final step:  98 | reward: -1.0 | E[R] last 100: 153.5113525390625
episode: 12180 | final step:  98 | reward: -1.0 | E[R] last 100: 153.5113525390625
episode: 12181 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12182 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12183 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12184 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12185 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12186 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12187 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164062
episode: 12188 | final step:  98 | reward: -1.0 | E[R] last 100: 102.77896118164

episode: 12276 | final step:  98 | reward: -1.0 | E[R] last 100: 1.349700927734375
episode: 12277 | final step: 226 | reward: 49.7 | E[R] last 100: 52.01219940185547
episode: 12278 | final step:  99 | reward: -1.0 | E[R] last 100: 52.01219940185547
episode: 12279 | final step:  98 | reward: -1.0 | E[R] last 100: 52.01219940185547
episode: 12280 | final step:  98 | reward: -1.0 | E[R] last 100: 52.01219940185547
episode: 12281 | final step:  98 | reward: -1.0 | E[R] last 100: 52.01219940185547
episode: 12282 | final step: 231 | reward: 49.6 | E[R] last 100: 102.6417007446289
episode: 12283 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6417007446289
episode: 12284 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6417007446289
episode: 12285 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6417007446289
episode: 12286 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6417007446289
episode: 12287 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6417007446289
epis

episode: 12374 | final step:  98 | reward: -1.0 | E[R] last 100: 203.98919677734375
episode: 12375 | final step:  98 | reward: -1.0 | E[R] last 100: 203.98919677734375
episode: 12376 | final step:  98 | reward: -1.0 | E[R] last 100: 203.98919677734375
episode: 12377 | final step:  98 | reward: -1.0 | E[R] last 100: 153.32669067382812
episode: 12378 | final step:  98 | reward: -1.0 | E[R] last 100: 153.32669067382812
episode: 12379 | final step:  98 | reward: -1.0 | E[R] last 100: 153.32669067382812
episode: 12380 | final step:  98 | reward: -1.0 | E[R] last 100: 153.32669067382812
episode: 12381 | final step:  98 | reward: -1.0 | E[R] last 100: 153.32669067382812
episode: 12382 | final step:  98 | reward: -1.0 | E[R] last 100: 102.69718933105469
episode: 12383 | final step:  98 | reward: -1.0 | E[R] last 100: 102.69718933105469
episode: 12384 | final step:  98 | reward: -1.0 | E[R] last 100: 102.69718933105469
episode: 12385 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6971893

episode: 12479 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12480 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12481 | final step:  99 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12482 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12483 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12484 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12485 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12486 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12487 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12488 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12489 | final step:  98 | reward: -1.0 | E[R] last 100: -49.43090057373047
episode: 12490 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4309005

episode: 12577 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12578 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12579 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12580 | final step:  99 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12581 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12582 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12583 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12584 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12585 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12586 | final step:  99 | reward: -1.0 | E[R] last 100: -49.42726135253906
episode: 12587 | final step: 232 | reward: 49.7 | E[R] last 100: 1.2544975280761719
episode: 12588 | final step:  98 | reward: -1.0 | E[R] last 100: 1.254497528

episode: 12677 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12678 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12679 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12680 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12681 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12682 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12683 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12684 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12685 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12686 | final step:  98 | reward: -1.0 | E[R] last 100: 1.305419921875
episode: 12687 | final step:  98 | reward: -1.0 | E[R] last 100: -49.376338958740234
episode: 12688 | final step:  98 | reward: -1.0 | E[R] last 100: -49.376338958740234
episode: 12689 | final step:  

episode: 12775 | final step:  99 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12776 | final step:  99 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12777 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12778 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12779 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12780 | final step:  99 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12781 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12782 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12783 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12784 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12785 | final step:  99 | reward: -1.0 | E[R] last 100: -49.346858978271484
episode: 12786 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 12874 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12875 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12876 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12877 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12878 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12879 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12880 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12881 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12882 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12883 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12884 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
episode: 12885 | final step:  98 | reward: -1.0 | E[R] last 100: 51.96293640136719
epis

episode: 12973 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12974 | final step:  99 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12975 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12976 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12977 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12978 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12979 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12980 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12981 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12982 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12983 | final step:  98 | reward: -1.0 | E[R] last 100: -49.44982147216797
episode: 12984 | final step:  99 | reward: -1.0 | E[R] last 100: -49.4498214

episode: 13080 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13081 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13082 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13083 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13084 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13085 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13086 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13087 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13088 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 13089 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13090 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13091 | final step: 227 | reward: 49.5 | E[R] last 100: -49.482479095458984
episode: 13092 | final step:  98 | reward: -1.0 | E[R] last 100: -49.482479095458984
episode: 13093 | final step:  98 | rew

episode: 13180 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13181 | final step:  99 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13182 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13183 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13184 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13185 | final step:  99 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13186 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13187 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13188 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13189 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13190 | final step:  98 | reward: -1.0 | E[R] last 100: 153.03585815429688
episode: 13191 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5183410

episode: 13278 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13279 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13280 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13281 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13282 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13283 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13284 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13285 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13286 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13287 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13288 | final step:  98 | reward: -1.0 | E[R] last 100: 51.794219970703125
episode: 13289 | final step:  98 | reward: -1.0 | E[R] last 100: 51.79421997

episode: 13377 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13378 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13379 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13380 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13381 | final step:  99 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13382 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13383 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13384 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13385 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13386 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13387 | final step:  98 | reward: -1.0 | E[R] last 100: -49.42605972290039
episode: 13388 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4260597

episode: 13475 | final step: 229 | reward: 49.8 | E[R] last 100: 102.64359283447266
episode: 13476 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13477 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13478 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13479 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13480 | final step:  99 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13481 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13482 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13483 | final step:  99 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13484 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13485 | final step:  98 | reward: -1.0 | E[R] last 100: 102.64359283447266
episode: 13486 | final step:  98 | reward: -1.0 | E[R] last 100: 102.6435928

episode: 13574 | final step:  98 | reward: -1.0 | E[R] last 100: 51.90991973876953
episode: 13575 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13576 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13577 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13578 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13579 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13580 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13581 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13582 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13583 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13584 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849609375
episode: 13585 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1576995849

episode: 13680 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13681 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13682 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13683 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13684 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 13685 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13686 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13687 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13688 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13689 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13690 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 13691 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 13692 | final step: 100 | reward: -1.0 | E[R] last 100: -100.0
episode: 13693 | final step:  98 | reward: -1.0 | E[R] last 100:

episode: 13782 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13783 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13784 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13785 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13786 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13787 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13788 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13789 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13790 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13791 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13792 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1879386901855469
episode: 13793 | final step:  99 | reward: -1.0 | E[R] last 100: 1.187938690

episode: 13880 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13881 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13882 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13883 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13884 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13885 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13886 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13887 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13888 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13889 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13890 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4075393676757812
episode: 13891 | final step:  98 | reward: -1.0 | E[R] last 100: 1.407539367

episode: 13978 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13979 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13980 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13981 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13982 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13983 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13984 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13985 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13986 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13987 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13988 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2214393615722656
episode: 13989 | final step:  98 | reward: -1.0 | E[R] last 100: 1.221439361

episode: 14076 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14077 | final step:  99 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14078 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14079 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14080 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14081 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14082 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14083 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14084 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14085 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14086 | final step:  98 | reward: -1.0 | E[R] last 100: 102.50581359863281
episode: 14087 | final step: 226 | reward: 49.7 | E[R] last 100: 153.1645965

episode: 14175 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14176 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14177 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14178 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14179 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14180 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14181 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14182 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14183 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14184 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14185 | final step:  98 | reward: -1.0 | E[R] last 100: 1.3356590270996094
episode: 14186 | final step:  99 | reward: -1.0 | E[R] last 100: 1.335659027

episode: 14279 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14280 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14281 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14282 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14283 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14284 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14285 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14286 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2409820556640625
episode: 14287 | final step: 234 | reward: 49.7 | E[R] last 100: 51.89548110961914
episode: 14288 | final step:  98 | reward: -1.0 | E[R] last 100: 51.89548110961914
episode: 14289 | final step:  98 | reward: -1.0 | E[R] last 100: 51.89548110961914
episode: 14290 | final step:  98 | reward: -1.0 | E[R] last 100: 51.89548110961

episode: 14377 | final step:  99 | reward: -1.0 | E[R] last 100: 1.2808990478515625
episode: 14378 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2808990478515625
episode: 14379 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2808990478515625
episode: 14380 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2808990478515625
episode: 14381 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2808990478515625
episode: 14382 | final step: 231 | reward: 49.6 | E[R] last 100: 51.87955856323242
episode: 14383 | final step:  98 | reward: -1.0 | E[R] last 100: 51.87955856323242
episode: 14384 | final step:  99 | reward: -1.0 | E[R] last 100: 51.87955856323242
episode: 14385 | final step:  98 | reward: -1.0 | E[R] last 100: 51.87955856323242
episode: 14386 | final step:  98 | reward: -1.0 | E[R] last 100: 51.87955856323242
episode: 14387 | final step:  98 | reward: -1.0 | E[R] last 100: 1.2250595092773438
episode: 14388 | final step:  98 | reward: -1.0 | E[R] last 100: 1.22505950927734

episode: 14477 | final step:  98 | reward: -1.0 | E[R] last 100: 51.951202392578125
episode: 14478 | final step:  98 | reward: -1.0 | E[R] last 100: 51.951202392578125
episode: 14479 | final step:  98 | reward: -1.0 | E[R] last 100: 51.951202392578125
episode: 14480 | final step:  98 | reward: -1.0 | E[R] last 100: 51.951202392578125
episode: 14481 | final step: 236 | reward: 49.6 | E[R] last 100: 102.57056427001953
episode: 14482 | final step: 100 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14483 | final step:  98 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14484 | final step:  98 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14485 | final step:  98 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14486 | final step:  98 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14487 | final step:  98 | reward: -1.0 | E[R] last 100: 51.971900939941406
episode: 14488 | final step:  98 | reward: -1.0 | E[R] last 100: 51.97190093

episode: 14576 | final step:  98 | reward: -1.0 | E[R] last 100: -49.380638122558594
episode: 14577 | final step:  99 | reward: -1.0 | E[R] last 100: -49.380638122558594
episode: 14578 | final step:  98 | reward: -1.0 | E[R] last 100: -49.380638122558594
episode: 14579 | final step:  98 | reward: -1.0 | E[R] last 100: -49.380638122558594
episode: 14580 | final step:  98 | reward: -1.0 | E[R] last 100: -49.380638122558594
episode: 14581 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14582 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14583 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14584 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 14585 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14586 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14587 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14588 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0

episode: 14689 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14690 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14691 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14692 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14693 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14694 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14695 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14696 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14697 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14698 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14699 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14700 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14701 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 14702 | final step:  99 | reward: -1.0 | E[R] last 100:

episode: 14792 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14793 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14794 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14795 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14796 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14797 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14798 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14799 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14800 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14801 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14802 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4429168701171875
episode: 14803 | final step:  98 | reward: -1.0 | E[R] last 100: 1.442916870

episode: 14890 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41117858886719
episode: 14891 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41117858886719
episode: 14892 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41117858886719
episode: 14893 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41117858886719
episode: 14894 | final step: 228 | reward: 49.6 | E[R] last 100: 1.1928024291992188
episode: 14895 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14896 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14897 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14898 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14899 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14900 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1928024291992188
episode: 14901 | final step:  98 | reward: -1.0 | E[R] last 100: 1.192802429

episode: 14988 | final step:  98 | reward: -1.0 | E[R] last 100: 51.612918853759766
episode: 14989 | final step:  98 | reward: -1.0 | E[R] last 100: 51.612918853759766
episode: 14990 | final step:  98 | reward: -1.0 | E[R] last 100: 51.612918853759766
episode: 14991 | final step:  98 | reward: -1.0 | E[R] last 100: 51.6129150390625
episode: 14992 | final step:  99 | reward: -1.0 | E[R] last 100: 51.6129150390625
episode: 14993 | final step:  98 | reward: -1.0 | E[R] last 100: 51.6129150390625
episode: 14994 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0089378356933594
episode: 14995 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0089378356933594
episode: 14996 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0089378356933594
episode: 14997 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0089378356933594
episode: 14998 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0089378356933594
episode: 14999 | final step:  98 | reward: -1.0 | E[R] last 100: 1.008937835693359

episode: 15086 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15087 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15088 | final step: 100 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15089 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15090 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15091 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15092 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15093 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15094 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15095 | final step:  99 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15096 | final step:  98 | reward: -1.0 | E[R] last 100: -49.46781921386719
episode: 15097 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4678192

episode: 15184 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15185 | final step:  99 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15186 | final step:  99 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15187 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15188 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15189 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15190 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15191 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15192 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15193 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15194 | final step:  98 | reward: -1.0 | E[R] last 100: -49.36405944824219
episode: 15195 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3640594

episode: 15282 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15283 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15284 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15285 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15286 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15287 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15288 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15289 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15290 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15291 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15292 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45042037963867
episode: 15293 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4504203

episode: 15385 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15386 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15387 | final step:  99 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15388 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15389 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15390 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15391 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15392 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15393 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15394 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15395 | final step:  98 | reward: -1.0 | E[R] last 100: -49.383880615234375
episode: 15396 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 15483 | final step:  99 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15484 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15485 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15486 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15487 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15488 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15489 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15490 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765235900878906
episode: 15491 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765228271484375
episode: 15492 | final step:  99 | reward: -1.0 | E[R] last 100: 51.765228271484375
episode: 15493 | final step:  98 | reward: -1.0 | E[R] last 100: 51.765228271484375
episode: 15494 | final step:  98 | reward: -1.0 | E[R] last 100: 51.76522827

episode: 15581 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15582 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15583 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15584 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15585 | final step:  99 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15586 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15587 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15588 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15589 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15590 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15591 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1561012268066406
episode: 15592 | final step:  98 | reward: -1.0 | E[R] last 100: 1.156101226

episode: 15680 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15681 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15682 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15683 | final step: 100 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15684 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15685 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15686 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15687 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15688 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15689 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15690 | final step:  99 | reward: -1.0 | E[R] last 100: -49.4240608215332
episode: 15691 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4240608215332
epis

episode: 15786 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15787 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15788 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15789 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15790 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15791 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15792 | final step:  99 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15793 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15794 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15795 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15796 | final step:  98 | reward: -1.0 | E[R] last 100: -49.492000579833984
episode: 15797 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 15883 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15884 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 15885 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15886 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 15887 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15888 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15889 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15890 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15891 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 15892 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15893 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15894 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15895 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 15896 | final step:  98 | reward: -1.0 | E[R] last 100:

episode: 15984 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15985 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15986 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15987 | final step:  99 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15988 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15989 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15990 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15991 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15992 | final step:  99 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15993 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15994 | final step:  98 | reward: -1.0 | E[R] last 100: -49.40890121459961
episode: 15995 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4089012

episode: 16082 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16083 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16084 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16085 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16086 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16087 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16088 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16089 | final step:  99 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16090 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16091 | final step:  99 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16092 | final step:  98 | reward: -1.0 | E[R] last 100: -49.39154052734375
episode: 16093 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3915405

episode: 16183 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16184 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16185 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16186 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16187 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16188 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52627563476562
episode: 16189 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52629089355469
episode: 16190 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52629089355469
episode: 16191 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52629089355469
episode: 16192 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52629089355469
episode: 16193 | final step:  98 | reward: -1.0 | E[R] last 100: 102.52629089355469
episode: 16194 | final step:  98 | reward: -1.0 | E[R] last 100: 102.5262908

episode: 16281 | final step:  99 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16282 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16283 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16284 | final step:  99 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16285 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16286 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16287 | final step:  99 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16288 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16289 | final step:  98 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16290 | final step: 100 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16291 | final step:  99 | reward: -1.0 | E[R] last 100: 51.917503356933594
episode: 16292 | final step:  99 | reward: -1.0 | E[R] last 100: 51.91750335

episode: 16385 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16386 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16387 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16388 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16389 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16390 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16391 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16392 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16393 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16394 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16395 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16396 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16397 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 16398 | final step:  98 | reward: -1.0 | E[R] last 100:

episode: 16491 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16492 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16493 | final step:  99 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16494 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16495 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16496 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16497 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16498 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16499 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16500 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16501 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0542831420898438
episode: 16502 | final step:  98 | reward: -1.0 | E[R] last 100: 1.054283142

episode: 16591 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16592 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16593 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16594 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16595 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16596 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16597 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16598 | final step:  99 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16599 | final step:  98 | reward: -1.0 | E[R] last 100: -49.399940490722656
episode: 16600 | final step: 228 | reward: 49.6 | E[R] last 100: 1.1519584655761719
episode: 16601 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1519584655761719
episode: 16602 | final step:  98 | reward: -1.0 | E[R] last 100: 1.

episode: 16689 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16690 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16691 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16692 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16693 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16694 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16695 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16696 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16697 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16698 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16699 | final step:  98 | reward: -1.0 | E[R] last 100: 1.0993003845214844
episode: 16700 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4525985

episode: 16792 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16793 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16794 | final step:  99 | reward: -1.0 | E[R] last 100: -100.0
episode: 16795 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16796 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16797 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16798 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16799 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16800 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16801 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16802 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16803 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16804 | final step:  98 | reward: -1.0 | E[R] last 100: -100.0
episode: 16805 | final step:  98 | reward: -1.0 | E[R] last 100:

episode: 16899 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16900 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16901 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16902 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16903 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16904 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16905 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16906 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16907 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16908 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16909 | final step:  98 | reward: -1.0 | E[R] last 100: -49.34236145019531
episode: 16910 | final step:  98 | reward: -1.0 | E[R] last 100: -49.3423614

episode: 17002 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17003 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17004 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17005 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17006 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17007 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17008 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17009 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17010 | final step:  99 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17011 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17012 | final step:  98 | reward: -1.0 | E[R] last 100: -49.441959381103516
episode: 17013 | final step:  98 | reward: -1.0 | E[R] last 100: 

episode: 17100 | final step:  98 | reward: -1.0 | E[R] last 100: 1.1845779418945312
episode: 17101 | final step: 236 | reward: 49.7 | E[R] last 100: 51.84451675415039
episode: 17102 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17103 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17104 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17105 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17106 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17107 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17108 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17109 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17110 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
episode: 17111 | final step:  98 | reward: -1.0 | E[R] last 100: 51.84451675415039
epi

episode: 17198 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4297599792480469
episode: 17199 | final step:  99 | reward: -1.0 | E[R] last 100: 1.4297599792480469
episode: 17200 | final step:  98 | reward: -1.0 | E[R] last 100: 1.4297599792480469
episode: 17201 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17202 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17203 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17204 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17205 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17206 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17207 | final step:  99 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17208 | final step:  98 | reward: -1.0 | E[R] last 100: -49.23017883300781
episode: 17209 | final step:  98 | reward: -1.0 | E[R] last 100: -49.2301788

episode: 17296 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17297 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17298 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17299 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17300 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17301 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17302 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17303 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17304 | final step:  99 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17305 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17306 | final step:  98 | reward: -1.0 | E[R] last 100: -49.45003890991211
episode: 17307 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4500389

episode: 17394 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17395 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17396 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17397 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17398 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17399 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17400 | final step:  99 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17401 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17402 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17403 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17404 | final step:  98 | reward: -1.0 | E[R] last 100: -49.41315841674805
episode: 17405 | final step:  98 | reward: -1.0 | E[R] last 100: -49.4131584

episode: 17491 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17492 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17493 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17494 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17495 | final step:  99 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17496 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17497 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625
episode: 17498 | final step:  98 | reward: -1.0 | E[R] last 100: -49.346099853515625


In [None]:
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(reward_list)

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[9999]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()