# Grid2Op with Deep SARSA

## Import the necessary software libraries:

In [1]:
import grid2op
import random
import copy
import gym
import numpy as np
import torch
import torch.nn.functional as F

import matplotlib.pyplot as plt

from typing import Callable
from torch import nn as nn
from torch.optim import AdamW
from tqdm import tqdm

from grid2op.gym_compat import GymEnv, BoxGymObsSpace, DiscreteActSpace
from gym import Env
from gym.utils.env_checker import check_env
try:
    from lightsim2grid import LightSimBackend
    bk_cls = LightSimBackend
except ImportError as exc:
    print(f"Error: {exc} when importing faster LightSimBackend")
    from grid2op.Backend import PandaPowerBackend
    bk_cls = PandaPowerBackend
    
from utils import plot_cost_to_go, plot_max_q, test_agent, plot_stats, seed_everything    
%matplotlib notebook

## Create and prepare the environment

### Create the environment & Convert into Gym Compatible 

In [3]:
env_name = "rte_case14_redisp"
#env = grid2op.make(env_name, test=True, backend=bk_cls()
env = grid2op.make(env_name)
print(type(env))
gym_env = GymEnv(env)


gym_env.observation_space = BoxGymObsSpace(env.observation_space)
gym_env.action_space = DiscreteActSpace(env.action_space)

#print(f"Is gym_env and open AI gym environment: {isinstance(gym_env, gym.Env)}")

<class 'abc.Environment_rte_case14_redisp'>




In [4]:
state_dims = gym_env.observation_space.shape[0]
num_actions = gym_env.action_space.n

In [5]:
num_actions

463

### Prepare the environment to work with PyTorch

In [6]:
class PreprocessEnv(gym.Wrapper):
    
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
    
    def reset(self):
        obs = self.env.reset()
        print(f'type={ type(obs) }')
        return torch.from_numpy(obs).unsqueeze(dim=0).float()
    
    def step(self, action):
        action = action.item()
        next_state, reward, done, info = self.env.step(action)
        next_state = torch.from_numpy(next_state).unsqueeze(dim=0).float()
        reward = torch.tensor(reward).view(1, -1).float()
        done = torch.tensor(done).view(1, -1)
        return next_state, reward, done, info

In [7]:
env = PreprocessEnv(gym_env)

In [8]:
state = env.reset()
action = torch.tensor(0)
next_state, reward, done, _ = env.step(action)

type=<class 'numpy.ndarray'>


### Create the Q-Network: $\hat q(s,a| \theta)$

In [9]:
q_network = nn.Sequential(
    nn.Linear(state_dims, 550),
    nn.ReLU(),
    nn.Linear(550, 500),
    nn.ReLU(),
    nn.Linear(500, num_actions))

In [10]:
q_network

Sequential(
  (0): Linear(in_features=453, out_features=550, bias=True)
  (1): ReLU()
  (2): Linear(in_features=550, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=463, bias=True)
)

### Create the target Q-Network: $\hat q(s, a|\theta_{targ})$

In [11]:
target_q_network = copy.deepcopy(q_network).eval()

### Create the $\epsilon$-greedy policy: $\pi(s)$

In [12]:
def policy(state, epsilon=0.05):
    if torch.rand(1) < epsilon:
        return torch.randint(num_actions, (1, 1))
    else:
        av = q_network(state).detach()
        return torch.argmax(av, dim=-1, keepdim=True)

### Plot the cost to go: $ - \max_a \hat q(s,a|\theta)$

In [13]:
#plot_cost_to_go(gym_env, q_network, xlabel='Car Position', ylabel='Velocity')

In [14]:
class ReplayMemory:

    def __init__(self, capacity=1000000):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def insert(self, transition):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = transition
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        assert self.can_sample(batch_size)

        batch = random.sample(self.memory, batch_size)
        batch = zip(*batch)
        return [torch.cat(items) for items in batch]

    def can_sample(self, batch_size):
        print(f'can_sample_self.memory = {len(self.memory)}')
        return len(self.memory) >= batch_size * 10

    def __len__(self):
        return len(self.memory)

# Deep SARSA Algorithm

![Screenshot%202021-03-11%20at%2019.03.36.png](attachment:Screenshot%202021-03-11%20at%2019.03.36.png)

In [17]:
def deep_sarsa(q_network, policy, episodes, alpha=0.001, batch_size=32, gamma=0.99, epsilon=0.):
    print(f'<<< deep_sarsa >>>')
    optim = AdamW(q_network.parameters(), lr=alpha) #optimiser to optimise weight calculation of neural networks
    memory = ReplayMemory() #Initialising memory to store State, Action, Reward, and Next State
    stats = {'MSE Loss': [], 'Returns': []} #Dict to store statistics
    
    for episode in tqdm(range(1, episodes + 1)):
        print(f'<<< episode >>>')
        state = env.reset() #getting initial state
        done = False
        ep_return = 0
        while not done:
            print(f'<<< while >>>')
            action = policy(state, epsilon) #Getting first action greedily with randomisation factor Epsilon
            next_state, reward, done, _ = env.step(action) #taking selected action on environment
            memory.insert([state, action, reward, done, next_state]) #Storing the results to memory
            print(f'batch_size = {memory.can_sample(batch_size)}') #printing the size of the memory
            if memory.can_sample(batch_size): #samples will be created only if memory pool is 10 times of batch size
                print(f'<<< if memory >>>')
                state_b, action_b, reward_b, done_b, next_state_b = memory.sample(batch_size) #creating batches to train neural network
                qsa_b = q_network(state_b).gather(1, action_b) #providing the state to neural network and comparing the 
                                                            #actions with actions stored in memory and gather the experiences
                next_action_b = policy(next_state_b) #using greedy epsilon policy to greedily get next actions
                next_qsa_b = target_q_network(next_state_b).gather(1, next_action_b) #provide next state and next action to a target neural network
                                                                                    #and gather its experiences
                target_b = reward_b + ~done_b * gamma * next_qsa_b #discount the experiences of target network
                loss = F.mse_loss(qsa_b, target_b) #find a Mean square error loss
                print(f'qsa_b = {qsa_b}, target_b = {target_b}')
                q_network.zero_grad() #reset the gradients of the network
                loss.backward() #calculate gradients using backward propogation
                optim.step() # Iterate over all parameters (tensors) that are supposed 
                            # to be updated and use internally stored grad to update their values
                
                loss.item() # get the loss
                print(loss.item())
                stats['MSE Loss'].append(loss.item())
            
            state = next_state
            ep_return += reward.item()
        
        stats['Returns'].append(ep_return)
        
        if episode % 10 == 0:
            target_q_network.load_state_dict(q_network.state_dict()) #After every 10 episodes load state of original network to
                                                                    # target network
        
    return stats

In [18]:
print(q_network)

Sequential(
  (0): Linear(in_features=453, out_features=550, bias=True)
  (1): ReLU()
  (2): Linear(in_features=550, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=463, bias=True)
)


In [19]:
stats = deep_sarsa(q_network, policy, 200, alpha=0.001, epsilon=0.05)

  0%|                                                                                          | 0/200 [00:00<?, ?it/s]

<<< deep_sarsa >>>
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1
can_sample_self.memory = 2
can_sample_self.memory = 3
can_sample_self.memory = 4
can_sample_self.memory = 5
can_sample_self.memory = 6
can_sample_self.memory = 7
can_sample_self.memory = 8
can_sample_self.memory = 9
can_sample_self.memory = 10
can_sample_self.memory = 11
can_sample_self.memory = 12
can_sample_self.memory = 13
can_sample_self.memory = 14
can_sample_self.memory = 15
can_sample_self.memory = 16
can_sample_self.memory = 17
can_sample_self.memory = 18
can_sample_self.memory = 19
can_sample_self.memory = 20
can_sample_self.memory = 21
can_sample_self.memory = 22
can_sample_self.memory = 23
can_sample_self.memory = 24
can_sample_self.memory = 25
can_sample_self.memory = 26
can_sample_self.memory = 27
can_sample_self.memory = 28
can_sample_self.memory = 29
can_sample_self.memory = 30
can_sample_self.memory = 31
can_sample_self.memory = 32
can_sample_self.memory = 33
can_sample_self.memor

  0%|▍                                                                                 | 1/200 [00:01<04:03,  1.23s/it]

can_sample_self.memory = 36
can_sample_self.memory = 37
can_sample_self.memory = 38
can_sample_self.memory = 39
can_sample_self.memory = 40
can_sample_self.memory = 41
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 42
can_sample_self.memory = 43
can_sample_self.memory = 44
can_sample_self.memory = 45
can_sample_self.memory = 46
can_sample_self.memory = 47
can_sample_self.memory = 48
can_sample_self.memory = 49
can_sample_self.memory = 50
can_sample_self.memory = 51
can_sample_self.memory = 52
can_sample_self.memory = 53
can_sample_self.memory = 54
can_sample_self.memory = 55
can_sample_self.memory = 56
can_sample_self.memory = 57
can_sample_self.memory = 58
can_sample_self.memory = 59
can_sample_self.memory = 60
can_sample_self.memory = 61
can_sample_self.memory = 62
can_sample_self.memory = 63
can_sample_self.memory = 64
can_sample_self.memory = 65
can_sample_self.memory = 66
can_sample_self.memory = 67
can_sample_self.memory = 68
can_sample_self.memory = 69
can

  1%|▊                                                                                 | 2/200 [00:03<06:00,  1.82s/it]

can_sample_self.memory = 120
can_sample_self.memory = 121
can_sample_self.memory = 122
can_sample_self.memory = 123
can_sample_self.memory = 124
can_sample_self.memory = 125
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 126
can_sample_self.memory = 127
can_sample_self.memory = 128
can_sample_self.memory = 129
can_sample_self.memory = 130
can_sample_self.memory = 131
can_sample_self.memory = 132
can_sample_self.memory = 133
can_sample_self.memory = 134
can_sample_self.memory = 135
can_sample_self.memory = 136
can_sample_self.memory = 137
can_sample_self.memory = 138
can_sample_self.memory = 139
can_sample_self.memory = 140
can_sample_self.memory = 141
can_sample_self.memory = 142
can_sample_self.memory = 143
can_sample_self.memory = 144
can_sample_self.memory = 145
can_sample_self.memory = 146
can_sample_self.memory = 147
can_sample_self.memory = 148
can_sample_self.memory = 149
can_sample_self.memory = 150
can_sample_self.memory = 151


  2%|█▏                                                                                | 3/200 [00:04<04:43,  1.44s/it]

can_sample_self.memory = 152
can_sample_self.memory = 153
can_sample_self.memory = 154
can_sample_self.memory = 155
can_sample_self.memory = 156
can_sample_self.memory = 157
can_sample_self.memory = 158
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 159
can_sample_self.memory = 160
can_sample_self.memory = 161
can_sample_self.memory = 162
can_sample_self.memory = 163
can_sample_self.memory = 164
can_sample_self.memory = 165
can_sample_self.memory = 166
can_sample_self.memory = 167
can_sample_self.memory = 168
can_sample_self.memory = 169
can_sample_self.memory = 170
can_sample_self.memory = 171
can_sample_self.memory = 172
can_sample_self.memory = 173
can_sample_self.memory = 174
can_sample_self.memory = 175


  2%|█▋                                                                                | 4/200 [00:05<03:38,  1.12s/it]

can_sample_self.memory = 176
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 177
can_sample_self.memory = 178
can_sample_self.memory = 179
can_sample_self.memory = 180
can_sample_self.memory = 181
can_sample_self.memory = 182
can_sample_self.memory = 183
can_sample_self.memory = 184
can_sample_self.memory = 185
can_sample_self.memory = 186


  2%|██                                                                                | 5/200 [00:05<02:49,  1.15it/s]

can_sample_self.memory = 187
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 188
can_sample_self.memory = 189
can_sample_self.memory = 190
can_sample_self.memory = 191
can_sample_self.memory = 192
can_sample_self.memory = 193
can_sample_self.memory = 194
can_sample_self.memory = 195
can_sample_self.memory = 196
can_sample_self.memory = 197
can_sample_self.memory = 198
can_sample_self.memory = 199
can_sample_self.memory = 200
can_sample_self.memory = 201
can_sample_self.memory = 202
can_sample_self.memory = 203
can_sample_self.memory = 204
can_sample_self.memory = 205
can_sample_self.memory = 206


  3%|██▍                                                                               | 6/200 [00:06<02:45,  1.17it/s]

can_sample_self.memory = 207
can_sample_self.memory = 208
can_sample_self.memory = 209
can_sample_self.memory = 210
can_sample_self.memory = 211
can_sample_self.memory = 212
can_sample_self.memory = 213
can_sample_self.memory = 214
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 215
can_sample_self.memory = 216
can_sample_self.memory = 217
can_sample_self.memory = 218
can_sample_self.memory = 219
can_sample_self.memory = 220
can_sample_self.memory = 221
can_sample_self.memory = 222
can_sample_self.memory = 223
can_sample_self.memory = 224
can_sample_self.memory = 225
can_sample_self.memory = 226
can_sample_self.memory = 227
can_sample_self.memory = 228
can_sample_self.memory = 229
can_sample_self.memory = 230
can_sample_self.memory = 231
can_sample_self.memory = 232
can_sample_self.memory = 233
can_sample_self.memory = 234
can_sample_self.memory = 235
can_sample_self.memory = 236
can_sample_self.memory = 237
can_sample_self.memory = 238
can_sample_self.memory = 23

  4%|██▊                                                                               | 7/200 [00:08<04:33,  1.42s/it]

can_sample_self.memory = 313
can_sample_self.memory = 314
can_sample_self.memory = 315
<<< episode >>>
type=<class 'numpy.ndarray'>


  4%|███▎                                                                              | 8/200 [00:09<03:27,  1.08s/it]

can_sample_self.memory = 316
can_sample_self.memory = 317
can_sample_self.memory = 318
can_sample_self.memory = 319
can_sample_self.memory = 320
can_sample_self.memory = 320
2375844.75
can_sample_self.memory = 321
can_sample_self.memory = 321
5088436.5
can_sample_self.memory = 322
can_sample_self.memory = 322
148934096.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 323
can_sample_self.memory = 323
6128829.0
can_sample_self.memory = 324
can_sample_self.memory = 324
66793812.0
can_sample_self.memory = 325
can_sample_self.memory = 325
86714056.0
can_sample_self.memory = 326
can_sample_self.memory = 326
34752756.0
can_sample_self.memory = 327
can_sample_self.memory = 327
122371416.0
can_sample_self.memory = 328
can_sample_self.memory = 328


  4%|███▋                                                                              | 9/200 [00:09<02:44,  1.16it/s]

17602574.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 329
can_sample_self.memory = 329


  5%|████                                                                             | 10/200 [00:09<02:05,  1.52it/s]

45964020.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 330
can_sample_self.memory = 330


  6%|████▍                                                                            | 11/200 [00:10<01:37,  1.93it/s]

40935384.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 331
can_sample_self.memory = 331
30054684.0
can_sample_self.memory = 332
can_sample_self.memory = 332
16265722.0
can_sample_self.memory = 333
can_sample_self.memory = 333
4496816.5
can_sample_self.memory = 334
can_sample_self.memory = 334
698920.75
can_sample_self.memory = 335
can_sample_self.memory = 335


  6%|████▊                                                                            | 12/200 [00:10<01:29,  2.09it/s]

5575220.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 336
can_sample_self.memory = 336
43543744.0
can_sample_self.memory = 337
can_sample_self.memory = 337
36385980.0
can_sample_self.memory = 338
can_sample_self.memory = 338
14978548.0
can_sample_self.memory = 339
can_sample_self.memory = 339
3181392.25
can_sample_self.memory = 340
can_sample_self.memory = 340
7803361.5
can_sample_self.memory = 341
can_sample_self.memory = 341
11161882.0
can_sample_self.memory = 342
can_sample_self.memory = 342
11837208.0
can_sample_self.memory = 343
can_sample_self.memory = 343
10103793.0
can_sample_self.memory = 344
can_sample_self.memory = 344
7649597.5
can_sample_self.memory = 345
can_sample_self.memory = 345
3984485.0
can_sample_self.memory = 346
can_sample_self.memory = 346
2891495.75
can_sample_self.memory = 347
can_sample_self.memory = 347
3287455.75
can_sample_self.memory = 348
can_sample_self.memory = 348
4108750.5
can_sample_self.memory = 349
can_sample_self.memory 

  6%|█████▎                                                                           | 13/200 [00:12<02:40,  1.16it/s]

3672106.25
can_sample_self.memory = 378
can_sample_self.memory = 378
672283.375
can_sample_self.memory = 379
can_sample_self.memory = 379
1696216.0
can_sample_self.memory = 380
can_sample_self.memory = 380
3149018.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 381
can_sample_self.memory = 381
2996269.25
can_sample_self.memory = 382
can_sample_self.memory = 382
2642430.5
can_sample_self.memory = 383
can_sample_self.memory = 383
2002696.625
can_sample_self.memory = 384
can_sample_self.memory = 384
1489426.75
can_sample_self.memory = 385
can_sample_self.memory = 385
1692573.375
can_sample_self.memory = 386
can_sample_self.memory = 386


  7%|█████▋                                                                           | 14/200 [00:12<02:19,  1.33it/s]

1415853.0
can_sample_self.memory = 387
can_sample_self.memory = 387
17030168.0
can_sample_self.memory = 388
can_sample_self.memory = 388
16680293.0
can_sample_self.memory = 389
can_sample_self.memory = 389
14665839.0
<<< episode >>>


  8%|██████                                                                           | 15/200 [00:12<01:47,  1.71it/s]

type=<class 'numpy.ndarray'>
can_sample_self.memory = 390
can_sample_self.memory = 390
9290231.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 391
can_sample_self.memory = 391
7906792.0
can_sample_self.memory = 392
can_sample_self.memory = 392
8511752.0
can_sample_self.memory = 393
can_sample_self.memory = 393
9984770.0
can_sample_self.memory = 394
can_sample_self.memory = 394
9117038.0
can_sample_self.memory = 395
can_sample_self.memory = 395
8242476.5
can_sample_self.memory = 396
can_sample_self.memory = 396
1481299.125
can_sample_self.memory = 397
can_sample_self.memory = 397
950045.625
can_sample_self.memory = 398
can_sample_self.memory = 398
608150.6875
can_sample_self.memory = 399
can_sample_self.memory = 399
1207343.375
can_sample_self.memory = 400
can_sample_self.memory = 400
1096141.125
can_sample_self.memory = 401
can_sample_self.memory = 401
1478952.875
can_sample_self.memory = 402
can_sample_self.memory = 402
1262674.125
can_sample_self.memory = 403


  8%|██████▍                                                                          | 16/200 [00:15<03:14,  1.06s/it]

473510.625
can_sample_self.memory = 443
can_sample_self.memory = 443
500708.90625
can_sample_self.memory = 444
can_sample_self.memory = 444
309938.125
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 445
can_sample_self.memory = 445
246064.828125
can_sample_self.memory = 446
can_sample_self.memory = 446
243152.578125
can_sample_self.memory = 447
can_sample_self.memory = 447
168651.0
can_sample_self.memory = 448
can_sample_self.memory = 448
201275.640625
can_sample_self.memory = 449
can_sample_self.memory = 449
6063694.5
can_sample_self.memory = 450
can_sample_self.memory = 450
153179.609375
can_sample_self.memory = 451
can_sample_self.memory = 451
414177.96875
can_sample_self.memory = 452
can_sample_self.memory = 452
222285.75
can_sample_self.memory = 453
can_sample_self.memory = 453
197771.390625
can_sample_self.memory = 454
can_sample_self.memory = 454
405284.875
can_sample_self.memory = 455
can_sample_self.memory = 455
6920341.5
can_sample_self.memory = 456
can_

  8%|██████▉                                                                          | 17/200 [00:15<03:04,  1.01s/it]

2443632.75
can_sample_self.memory = 462
can_sample_self.memory = 462
2309814.5
can_sample_self.memory = 463
can_sample_self.memory = 463
1887583.75
can_sample_self.memory = 464
can_sample_self.memory = 464
1677413.875
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 465
can_sample_self.memory = 465
1530407.5
can_sample_self.memory = 466
can_sample_self.memory = 466
2151472.25
can_sample_self.memory = 467
can_sample_self.memory = 467
1060381.75
can_sample_self.memory = 468
can_sample_self.memory = 468
692530.8125
can_sample_self.memory = 469
can_sample_self.memory = 469
521325.65625
can_sample_self.memory = 470
can_sample_self.memory = 470
1179946.875
can_sample_self.memory = 471
can_sample_self.memory = 471
508391.0625
can_sample_self.memory = 472
can_sample_self.memory = 472
977666.75
can_sample_self.memory = 473
can_sample_self.memory = 473
10012518.0
can_sample_self.memory = 474
can_sample_self.memory = 474
527368.75
can_sample_self.memory = 475
can_sample_self.

  9%|███████▎                                                                         | 18/200 [00:16<02:49,  1.07it/s]

can_sample_self.memory = 476
can_sample_self.memory = 476
1151889.0
can_sample_self.memory = 477
can_sample_self.memory = 477
1987512.125
can_sample_self.memory = 478
can_sample_self.memory = 478
13125493.0
can_sample_self.memory = 479
can_sample_self.memory = 479
12633084.0
can_sample_self.memory = 480
can_sample_self.memory = 480
11453396.0
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 481
can_sample_self.memory = 481
8819443.0
can_sample_self.memory = 482
can_sample_self.memory = 482
9020220.0
can_sample_self.memory = 483
can_sample_self.memory = 483
7604081.0
can_sample_self.memory = 484
can_sample_self.memory = 484
5866318.0
can_sample_self.memory = 485
can_sample_self.memory = 485
2318208.0
can_sample_self.memory = 486
can_sample_self.memory = 486
4894665.0
can_sample_self.memory = 487
can_sample_self.memory = 487
1304441.125
can_sample_self.memory = 488
can_sample_self.memory = 488
3483202.5
can_sample_self.memory = 489
can_sample_self.memory = 489
419924

 10%|███████▋                                                                         | 19/200 [00:17<02:57,  1.02it/s]

1278077.0
can_sample_self.memory = 504
can_sample_self.memory = 504
4337212.0
can_sample_self.memory = 505
can_sample_self.memory = 505
6230499.5
can_sample_self.memory = 506
can_sample_self.memory = 506
680851.75
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 507
can_sample_self.memory = 507
631272.9375
can_sample_self.memory = 508
can_sample_self.memory = 508
572406.1875
can_sample_self.memory = 509
can_sample_self.memory = 509
555600.0
can_sample_self.memory = 510
can_sample_self.memory = 510
43532.2265625
can_sample_self.memory = 511
can_sample_self.memory = 511
309974.125
can_sample_self.memory = 512
can_sample_self.memory = 512
99003.328125
can_sample_self.memory = 513
can_sample_self.memory = 513
333341.09375
can_sample_self.memory = 514
can_sample_self.memory = 514
158054.828125
can_sample_self.memory = 515
can_sample_self.memory = 515
277210.9375
can_sample_self.memory = 516
can_sample_self.memory = 516
123358.2109375
can_sample_self.memory = 517
can_sam

 10%|████████                                                                         | 20/200 [00:18<03:09,  1.05s/it]

1449582.75
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 536
can_sample_self.memory = 536
948357.4375
can_sample_self.memory = 537
can_sample_self.memory = 537
505434.8125
can_sample_self.memory = 538
can_sample_self.memory = 538
519916.40625
can_sample_self.memory = 539
can_sample_self.memory = 539
112253.34375
can_sample_self.memory = 540
can_sample_self.memory = 540
275099.59375
can_sample_self.memory = 541
can_sample_self.memory = 541
152840.59375
can_sample_self.memory = 542
can_sample_self.memory = 542
264579.1875
can_sample_self.memory = 543
can_sample_self.memory = 543
203376.15625
can_sample_self.memory = 544
can_sample_self.memory = 544
770138.375
can_sample_self.memory = 545
can_sample_self.memory = 545
310961.3125
can_sample_self.memory = 546
can_sample_self.memory = 546
488152.34375
can_sample_self.memory = 547
can_sample_self.memory = 547
518785.34375
can_sample_self.memory = 548
can_sample_self.memory = 548
18288.572265625
can_sample_self.memory =

 10%|████████▌                                                                        | 21/200 [00:20<03:15,  1.09s/it]

can_sample_self.memory = 559
can_sample_self.memory = 559
314837.0625
can_sample_self.memory = 560
can_sample_self.memory = 560
291766.46875
can_sample_self.memory = 561
can_sample_self.memory = 561
167080.59375
can_sample_self.memory = 562
can_sample_self.memory = 562
1612900.25
<<< episode >>>


 11%|████████▉                                                                        | 22/200 [00:20<02:31,  1.18it/s]

type=<class 'numpy.ndarray'>
can_sample_self.memory = 563
can_sample_self.memory = 563
1433642.5
can_sample_self.memory = 564
can_sample_self.memory = 564
1011233.875
can_sample_self.memory = 565
can_sample_self.memory = 565
331627.75
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 566
can_sample_self.memory = 566
1126271.5
can_sample_self.memory = 567
can_sample_self.memory = 567
924737.1875
can_sample_self.memory = 568
can_sample_self.memory = 568
1458115.375
can_sample_self.memory = 569
can_sample_self.memory = 569
1250145.25
can_sample_self.memory = 570
can_sample_self.memory = 570
647482.8125
can_sample_self.memory = 571
can_sample_self.memory = 571
354149.15625
can_sample_self.memory = 572
can_sample_self.memory = 572
4273553.0
can_sample_self.memory = 573
can_sample_self.memory = 573
4431808.5
can_sample_self.memory = 574
can_sample_self.memory = 574
4098698.5
can_sample_self.memory = 575
can_sample_self.memory = 575
2790360.5
can_sample_self.memory = 576
c

 12%|█████████▎                                                                       | 23/200 [00:21<02:23,  1.23it/s]

can_sample_self.memory = 577
can_sample_self.memory = 577
1817647.625
can_sample_self.memory = 578
can_sample_self.memory = 578
1603105.25
can_sample_self.memory = 579
can_sample_self.memory = 579
2205655.25
can_sample_self.memory = 580
can_sample_self.memory = 580
12442600.0
can_sample_self.memory = 581
can_sample_self.memory = 581
1036433.125
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 582
can_sample_self.memory = 582
884409.375
can_sample_self.memory = 583
can_sample_self.memory = 583
1201549.875
can_sample_self.memory = 584
can_sample_self.memory = 584
1931578.125
can_sample_self.memory = 585
can_sample_self.memory = 585
2690033.25
can_sample_self.memory = 586
can_sample_self.memory = 586
nan
can_sample_self.memory = 587
can_sample_self.memory = 587
nan
can_sample_self.memory = 588
can_sample_self.memory = 588
nan
can_sample_self.memory = 589
can_sample_self.memory = 589
nan
can_sample_self.memory = 590
can_sample_self.memory = 590
nan
can_sample_self.memo

 12%|█████████▋                                                                       | 24/200 [00:22<02:51,  1.03it/s]

can_sample_self.memory = 611
can_sample_self.memory = 611
nan
can_sample_self.memory = 612
can_sample_self.memory = 612
nan
can_sample_self.memory = 613
can_sample_self.memory = 613
nan
can_sample_self.memory = 614
can_sample_self.memory = 614
nan
can_sample_self.memory = 615
can_sample_self.memory = 615
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 616
can_sample_self.memory = 616
nan
can_sample_self.memory = 617
can_sample_self.memory = 617
nan
can_sample_self.memory = 618
can_sample_self.memory = 618
nan
can_sample_self.memory = 619
can_sample_self.memory = 619
nan
can_sample_self.memory = 620
can_sample_self.memory = 620
nan
can_sample_self.memory = 621
can_sample_self.memory = 621
nan
can_sample_self.memory = 622
can_sample_self.memory = 622
nan
can_sample_self.memory = 623
can_sample_self.memory = 623
nan
can_sample_self.memory = 624
can_sample_self.memory = 624
nan
can_sample_self.memory = 625
can_sample_self.memory = 625
nan
can_sample_self.memory = 

 12%|██████████▏                                                                      | 25/200 [00:24<03:59,  1.37s/it]

can_sample_self.memory = 673
nan
can_sample_self.memory = 674
can_sample_self.memory = 674
nan
can_sample_self.memory = 675
can_sample_self.memory = 675
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 676
can_sample_self.memory = 676
nan
can_sample_self.memory = 677
can_sample_self.memory = 677
nan
can_sample_self.memory = 678
can_sample_self.memory = 678
nan
can_sample_self.memory = 679
can_sample_self.memory = 679
nan
can_sample_self.memory = 680
can_sample_self.memory = 680
nan
can_sample_self.memory = 681
can_sample_self.memory = 681
nan
can_sample_self.memory = 682
can_sample_self.memory = 682
nan
can_sample_self.memory = 683
can_sample_self.memory = 683
nan
can_sample_self.memory = 684
can_sample_self.memory = 684
nan
can_sample_self.memory = 685
can_sample_self.memory = 685
nan
can_sample_self.memory = 686
can_sample_self.memory = 686
nan
can_sample_self.memory = 687
can_sample_self.memory = 687
nan
can_sample_self.memory = 688
can_sample_self.memory = 

 13%|██████████▌                                                                      | 26/200 [00:27<04:45,  1.64s/it]

nan
can_sample_self.memory = 733
can_sample_self.memory = 733
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 734
can_sample_self.memory = 734
nan
can_sample_self.memory = 735
can_sample_self.memory = 735
nan
can_sample_self.memory = 736
can_sample_self.memory = 736
nan
can_sample_self.memory = 737
can_sample_self.memory = 737
nan
can_sample_self.memory = 738
can_sample_self.memory = 738
nan
can_sample_self.memory = 739
can_sample_self.memory = 739
nan
can_sample_self.memory = 740
can_sample_self.memory = 740
nan
can_sample_self.memory = 741
can_sample_self.memory = 741
nan
can_sample_self.memory = 742
can_sample_self.memory = 742
nan
can_sample_self.memory = 743
can_sample_self.memory = 743
nan
can_sample_self.memory = 744
can_sample_self.memory = 744
nan
can_sample_self.memory = 745
can_sample_self.memory = 745
nan
can_sample_self.memory = 746
can_sample_self.memory = 746
nan
can_sample_self.memory = 747
can_sample_self.memory = 747
nan
can_sample_self.memor

 14%|██████████▉                                                                      | 27/200 [00:30<06:37,  2.30s/it]

nan
can_sample_self.memory = 830
can_sample_self.memory = 830
nan
can_sample_self.memory = 831
can_sample_self.memory = 831
nan
can_sample_self.memory = 832
can_sample_self.memory = 832
nan
can_sample_self.memory = 833
can_sample_self.memory = 833
nan
can_sample_self.memory = 834
can_sample_self.memory = 834
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 835
can_sample_self.memory = 835
nan
can_sample_self.memory = 836
can_sample_self.memory = 836
nan
can_sample_self.memory = 837
can_sample_self.memory = 837
nan
can_sample_self.memory = 838
can_sample_self.memory = 838
nan
can_sample_self.memory = 839
can_sample_self.memory = 839
nan
can_sample_self.memory = 840
can_sample_self.memory = 840
nan
can_sample_self.memory = 841
can_sample_self.memory = 841
nan
can_sample_self.memory = 842
can_sample_self.memory = 842
nan
can_sample_self.memory = 843
can_sample_self.memory = 843
nan
can_sample_self.memory = 844
can_sample_self.memory = 844
nan
can_sample_self.memor

 14%|███████████▎                                                                     | 28/200 [00:31<05:09,  1.80s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 846
can_sample_self.memory = 846
nan
can_sample_self.memory = 847
can_sample_self.memory = 847
nan
can_sample_self.memory = 848
can_sample_self.memory = 848
nan
can_sample_self.memory = 849
can_sample_self.memory = 849
nan
can_sample_self.memory = 850
can_sample_self.memory = 850
nan
can_sample_self.memory = 851
can_sample_self.memory = 851
nan
can_sample_self.memory = 852
can_sample_self.memory = 852
nan
can_sample_self.memory = 853
can_sample_self.memory = 853
nan
can_sample_self.memory = 854
can_sample_self.memory = 854
nan
can_sample_self.memory = 855
can_sample_self.memory = 855
nan
can_sample_self.memory = 856
can_sample_self.memory = 856
nan
can_sample_self.memory = 857
can_sample_self.memory = 857
nan
can_sample_self.memory = 858
can_sample_self.memory = 858
nan
can_sample_self.memory = 859
can_sample_self.memory = 859
nan
can_sample_self.memory = 860
can_sample_self.memory = 860
nan
can_sample_self.memor

 14%|███████████▋                                                                     | 29/200 [00:33<05:01,  1.77s/it]

can_sample_self.memory = 887
can_sample_self.memory = 887
nan
can_sample_self.memory = 888
can_sample_self.memory = 888
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 889
can_sample_self.memory = 889
nan
can_sample_self.memory = 890
can_sample_self.memory = 890
nan
can_sample_self.memory = 891
can_sample_self.memory = 891
nan
can_sample_self.memory = 892
can_sample_self.memory = 892
nan
can_sample_self.memory = 893
can_sample_self.memory = 893
nan
can_sample_self.memory = 894
can_sample_self.memory = 894
nan
can_sample_self.memory = 895
can_sample_self.memory = 895
nan
can_sample_self.memory = 896
can_sample_self.memory = 896
nan
can_sample_self.memory = 897
can_sample_self.memory = 897
nan
can_sample_self.memory = 898
can_sample_self.memory = 898
nan
can_sample_self.memory = 899
can_sample_self.memory = 899
nan
can_sample_self.memory = 900
can_sample_self.memory = 900
nan
can_sample_self.memory = 901
can_sample_self.memory = 901
nan
can_sample_self.memory = 

 15%|████████████▏                                                                    | 30/200 [00:35<05:35,  1.97s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 954
can_sample_self.memory = 954
nan
can_sample_self.memory = 955
can_sample_self.memory = 955
nan
can_sample_self.memory = 956
can_sample_self.memory = 956
nan
can_sample_self.memory = 957
can_sample_self.memory = 957
nan
can_sample_self.memory = 958
can_sample_self.memory = 958
nan
can_sample_self.memory = 959
can_sample_self.memory = 959
nan
can_sample_self.memory = 960
can_sample_self.memory = 960
nan
can_sample_self.memory = 961
can_sample_self.memory = 961
nan
can_sample_self.memory = 962
can_sample_self.memory = 962
nan
can_sample_self.memory = 963
can_sample_self.memory = 963
nan
can_sample_self.memory = 964
can_sample_self.memory = 964
nan
can_sample_self.memory = 965
can_sample_self.memory = 965
nan
can_sample_self.memory = 966
can_sample_self.memory = 966
nan
can_sample_self.memory = 967
can_sample_self.memory = 967
nan
can_sample_self.memory = 968
can_sample_self.memory = 968
nan
can_sample_self.memor

 16%|████████████▌                                                                    | 31/200 [00:36<04:42,  1.67s/it]

nan
can_sample_self.memory = 973
can_sample_self.memory = 973
nan
can_sample_self.memory = 974
can_sample_self.memory = 974
nan
can_sample_self.memory = 975
can_sample_self.memory = 975
nan
can_sample_self.memory = 976
can_sample_self.memory = 976
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 977
can_sample_self.memory = 977
nan
can_sample_self.memory = 978
can_sample_self.memory = 978
nan
can_sample_self.memory = 979
can_sample_self.memory = 979
nan
can_sample_self.memory = 980
can_sample_self.memory = 980
nan
can_sample_self.memory = 981
can_sample_self.memory = 981
nan
can_sample_self.memory = 982
can_sample_self.memory = 982
nan
can_sample_self.memory = 983
can_sample_self.memory = 983
nan
can_sample_self.memory = 984
can_sample_self.memory = 984
nan
can_sample_self.memory = 985
can_sample_self.memory = 985
nan
can_sample_self.memory = 986
can_sample_self.memory = 986
nan
can_sample_self.memory = 987
can_sample_self.memory = 987
nan
can_sample_self.memor

 16%|████████████▉                                                                    | 32/200 [00:38<04:27,  1.59s/it]

<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1012
can_sample_self.memory = 1012
nan
can_sample_self.memory = 1013
can_sample_self.memory = 1013
nan
can_sample_self.memory = 1014
can_sample_self.memory = 1014
nan
can_sample_self.memory = 1015
can_sample_self.memory = 1015
nan
can_sample_self.memory = 1016
can_sample_self.memory = 1016
nan
can_sample_self.memory = 1017
can_sample_self.memory = 1017
nan
can_sample_self.memory = 1018
can_sample_self.memory = 1018
nan
can_sample_self.memory = 1019
can_sample_self.memory = 1019
nan
can_sample_self.memory = 1020
can_sample_self.memory = 1020
nan
can_sample_self.memory = 1021
can_sample_self.memory = 1021
nan
can_sample_self.memory = 1022
can_sample_self.memory = 1022
nan
can_sample_self.memory = 1023
can_sample_self.memory = 1023
nan
can_sample_self.memory = 1024
can_sample_self.memory = 1024
nan
can_sample_self.memory = 1025
can_sample_self.memory = 1025
nan
can_sample_self.memory = 1026
can_sample_self.memory = 1026

 16%|█████████████▎                                                                   | 33/200 [00:41<05:58,  2.15s/it]

nan
can_sample_self.memory = 1101
can_sample_self.memory = 1101
nan
can_sample_self.memory = 1102
can_sample_self.memory = 1102
nan
can_sample_self.memory = 1103
can_sample_self.memory = 1103
nan
can_sample_self.memory = 1104
can_sample_self.memory = 1104
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1105
can_sample_self.memory = 1105
nan
can_sample_self.memory = 1106
can_sample_self.memory = 1106
nan
can_sample_self.memory = 1107
can_sample_self.memory = 1107
nan
can_sample_self.memory = 1108
can_sample_self.memory = 1108
nan
can_sample_self.memory = 1109
can_sample_self.memory = 1109
nan
can_sample_self.memory = 1110
can_sample_self.memory = 1110
nan
can_sample_self.memory = 1111
can_sample_self.memory = 1111
nan
can_sample_self.memory = 1112
can_sample_self.memory = 1112
nan
can_sample_self.memory = 1113
can_sample_self.memory = 1113
nan
can_sample_self.memory = 1114
can_sample_self.memory = 1114
nan
can_sample_self.memory = 1115
can_sample_self.memory = 

 17%|█████████████▊                                                                   | 34/200 [00:42<05:00,  1.81s/it]

nan
can_sample_self.memory = 1129
can_sample_self.memory = 1129
nan
<<< episode >>>
type=<class 'numpy.ndarray'>


 18%|██████████████▏                                                                  | 35/200 [00:42<03:43,  1.36s/it]

can_sample_self.memory = 1130
can_sample_self.memory = 1130
nan
can_sample_self.memory = 1131
can_sample_self.memory = 1131
nan
can_sample_self.memory = 1132
can_sample_self.memory = 1132
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1133
can_sample_self.memory = 1133
nan
can_sample_self.memory = 1134
can_sample_self.memory = 1134
nan
can_sample_self.memory = 1135
can_sample_self.memory = 1135
nan
can_sample_self.memory = 1136
can_sample_self.memory = 1136
nan
can_sample_self.memory = 1137
can_sample_self.memory = 1137
nan
can_sample_self.memory = 1138
can_sample_self.memory = 1138
nan
can_sample_self.memory = 1139
can_sample_self.memory = 1139
nan
can_sample_self.memory = 1140
can_sample_self.memory = 1140
nan
can_sample_self.memory = 1141
can_sample_self.memory = 1141
nan
can_sample_self.memory = 1142
can_sample_self.memory = 1142
nan
can_sample_self.memory = 1143
can_sample_self.memory = 1143
nan
can_sample_self.memory = 1144
can_sample_self.memory = 1144

 18%|██████████████▌                                                                  | 36/200 [00:46<05:58,  2.19s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1246
can_sample_self.memory = 1246
nan
can_sample_self.memory = 1247
can_sample_self.memory = 1247
nan
can_sample_self.memory = 1248
can_sample_self.memory = 1248
nan
can_sample_self.memory = 1249
can_sample_self.memory = 1249
nan
can_sample_self.memory = 1250
can_sample_self.memory = 1250
nan
can_sample_self.memory = 1251
can_sample_self.memory = 1251
nan
can_sample_self.memory = 1252
can_sample_self.memory = 1252
nan
can_sample_self.memory = 1253
can_sample_self.memory = 1253
nan
can_sample_self.memory = 1254
can_sample_self.memory = 1254
nan
can_sample_self.memory = 1255
can_sample_self.memory = 1255
nan
can_sample_self.memory = 1256
can_sample_self.memory = 1256
nan
can_sample_self.memory = 1257
can_sample_self.memory = 1257
nan
can_sample_self.memory = 1258
can_sample_self.memory = 1258
nan
can_sample_self.memory = 1259
can_sample_self.memory = 1259
nan
can_sample_self.memory = 1260
can_sample_self.memory = 

 18%|██████████████▉                                                                  | 37/200 [00:50<06:38,  2.44s/it]

nan
can_sample_self.memory = 1322
can_sample_self.memory = 1322
nan
can_sample_self.memory = 1323
can_sample_self.memory = 1323
nan
can_sample_self.memory = 1324
can_sample_self.memory = 1324
nan
can_sample_self.memory = 1325
can_sample_self.memory = 1325
nan
can_sample_self.memory = 1326
can_sample_self.memory = 1326
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1327
can_sample_self.memory = 1327
nan
can_sample_self.memory = 1328
can_sample_self.memory = 1328
nan
can_sample_self.memory = 1329
can_sample_self.memory = 1329
nan
can_sample_self.memory = 1330
can_sample_self.memory = 1330
nan
can_sample_self.memory = 1331
can_sample_self.memory = 1331
nan
can_sample_self.memory = 1332
can_sample_self.memory = 1332
nan
can_sample_self.memory = 1333
can_sample_self.memory = 1333
nan
can_sample_self.memory = 1334
can_sample_self.memory = 1334
nan
can_sample_self.memory = 1335
can_sample_self.memory = 1335
nan
can_sample_self.memory = 1336
can_sample_self.memory = 

 19%|███████████████▍                                                                 | 38/200 [00:51<05:44,  2.13s/it]

nan
can_sample_self.memory = 1357
can_sample_self.memory = 1357
nan
can_sample_self.memory = 1358
can_sample_self.memory = 1358
nan
can_sample_self.memory = 1359
can_sample_self.memory = 1359
nan
can_sample_self.memory = 1360
can_sample_self.memory = 1360
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1361
can_sample_self.memory = 1361
nan
can_sample_self.memory = 1362
can_sample_self.memory = 1362
nan
can_sample_self.memory = 1363
can_sample_self.memory = 1363
nan
can_sample_self.memory = 1364
can_sample_self.memory = 1364
nan
can_sample_self.memory = 1365
can_sample_self.memory = 1365
nan
can_sample_self.memory = 1366


 20%|███████████████▊                                                                 | 39/200 [00:51<04:21,  1.62s/it]

can_sample_self.memory = 1366
nan
can_sample_self.memory = 1367
can_sample_self.memory = 1367
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1368
can_sample_self.memory = 1368
nan
can_sample_self.memory = 1369
can_sample_self.memory = 1369
nan
can_sample_self.memory = 1370
can_sample_self.memory = 1370
nan
can_sample_self.memory = 1371
can_sample_self.memory = 1371
nan
can_sample_self.memory = 1372
can_sample_self.memory = 1372
nan
can_sample_self.memory = 1373
can_sample_self.memory = 1373
nan
can_sample_self.memory = 1374
can_sample_self.memory = 1374
nan
can_sample_self.memory = 1375
can_sample_self.memory = 1375
nan
can_sample_self.memory = 1376
can_sample_self.memory = 1376
nan
can_sample_self.memory = 1377
can_sample_self.memory = 1377
nan
can_sample_self.memory = 1378
can_sample_self.memory = 1378
nan
can_sample_self.memory = 1379
can_sample_self.memory = 1379
nan
can_sample_self.memory = 1380
can_sample_self.memory = 1380
nan
can_sample_self.memory = 

 20%|████████████████▏                                                                | 40/200 [00:53<04:16,  1.60s/it]

can_sample_self.memory = 1402
can_sample_self.memory = 1402
nan
can_sample_self.memory = 1403
can_sample_self.memory = 1403
nan
can_sample_self.memory = 1404
can_sample_self.memory = 1404
nan
can_sample_self.memory = 1405
can_sample_self.memory = 1405
nan
can_sample_self.memory = 1406
can_sample_self.memory = 1406
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1407
can_sample_self.memory = 1407
nan
can_sample_self.memory = 1408
can_sample_self.memory = 1408
nan
can_sample_self.memory = 1409
can_sample_self.memory = 1409
nan
can_sample_self.memory = 1410
can_sample_self.memory = 1410
nan
can_sample_self.memory = 1411
can_sample_self.memory = 1411
nan
can_sample_self.memory = 1412
can_sample_self.memory = 1412
nan
can_sample_self.memory = 1413
can_sample_self.memory = 1413
nan
can_sample_self.memory = 1414
can_sample_self.memory = 1414
nan
can_sample_self.memory = 1415
can_sample_self.memory = 1415
nan
can_sample_self.memory = 1416
can_sample_self.memory = 1416

 20%|████████████████▌                                                                | 41/200 [00:57<06:21,  2.40s/it]

can_sample_self.memory = 1520
can_sample_self.memory = 1520
nan
can_sample_self.memory = 1521
can_sample_self.memory = 1521
nan
can_sample_self.memory = 1522
can_sample_self.memory = 1522
nan
can_sample_self.memory = 1523
can_sample_self.memory = 1523
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1524
can_sample_self.memory = 1524
nan
can_sample_self.memory = 1525
can_sample_self.memory = 1525
nan
can_sample_self.memory = 1526
can_sample_self.memory = 1526
nan
can_sample_self.memory = 1527
can_sample_self.memory = 1527
nan
can_sample_self.memory = 1528
can_sample_self.memory = 1528
nan
can_sample_self.memory = 1529
can_sample_self.memory = 1529
nan
can_sample_self.memory = 1530
can_sample_self.memory = 1530
nan
can_sample_self.memory = 1531
can_sample_self.memory = 1531
nan
can_sample_self.memory = 1532
can_sample_self.memory = 1532
nan
can_sample_self.memory = 1533
can_sample_self.memory = 1533
nan
can_sample_self.memory = 1534
can_sample_self.memory = 1534

 21%|█████████████████                                                                | 42/200 [01:00<06:36,  2.51s/it]

nan
can_sample_self.memory = 1594
can_sample_self.memory = 1594
nan
can_sample_self.memory = 1595
can_sample_self.memory = 1595
nan
can_sample_self.memory = 1596
can_sample_self.memory = 1596
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1597
can_sample_self.memory = 1597
nan
can_sample_self.memory = 1598
can_sample_self.memory = 1598
nan
can_sample_self.memory = 1599
can_sample_self.memory = 1599
nan
can_sample_self.memory = 1600
can_sample_self.memory = 1600
nan
can_sample_self.memory = 1601
can_sample_self.memory = 1601
nan
can_sample_self.memory = 1602
can_sample_self.memory = 1602
nan
can_sample_self.memory = 1603
can_sample_self.memory = 1603
nan
can_sample_self.memory = 1604
can_sample_self.memory = 1604
nan
can_sample_self.memory = 1605
can_sample_self.memory = 1605
nan
can_sample_self.memory = 1606
can_sample_self.memory = 1606
nan
can_sample_self.memory = 1607
can_sample_self.memory = 1607
nan
can_sample_self.memory = 1608
can_sample_self.memory = 

 22%|█████████████████▍                                                               | 43/200 [01:02<05:53,  2.25s/it]

can_sample_self.memory = 1638
can_sample_self.memory = 1638
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1639
can_sample_self.memory = 1639
nan
can_sample_self.memory = 1640
can_sample_self.memory = 1640
nan
can_sample_self.memory = 1641
can_sample_self.memory = 1641
nan
can_sample_self.memory = 1642
can_sample_self.memory = 1642
nan
can_sample_self.memory = 1643
can_sample_self.memory = 1643
nan
can_sample_self.memory = 1644
can_sample_self.memory = 1644
nan
can_sample_self.memory = 1645
can_sample_self.memory = 1645
nan
can_sample_self.memory = 1646
can_sample_self.memory = 1646
nan
can_sample_self.memory = 1647
can_sample_self.memory = 1647
nan
can_sample_self.memory = 1648
can_sample_self.memory = 1648
nan
can_sample_self.memory = 1649
can_sample_self.memory = 1649
nan
can_sample_self.memory = 1650
can_sample_self.memory = 1650
nan
can_sample_self.memory = 1651
can_sample_self.memory = 1651
nan
can_sample_self.memory = 1652
can_sample_self.memory = 1652

nan
can_sample_self.memory = 1766
can_sample_self.memory = 1766
nan
can_sample_self.memory = 1767
can_sample_self.memory = 1767
nan
can_sample_self.memory = 1768
can_sample_self.memory = 1768
nan
can_sample_self.memory = 1769
can_sample_self.memory = 1769
nan
can_sample_self.memory = 1770
can_sample_self.memory = 1770
nan
can_sample_self.memory = 1771
can_sample_self.memory = 1771
nan
can_sample_self.memory = 1772
can_sample_self.memory = 1772
nan
can_sample_self.memory = 1773
can_sample_self.memory = 1773
nan
can_sample_self.memory = 1774
can_sample_self.memory = 1774
nan
can_sample_self.memory = 1775
can_sample_self.memory = 1775
nan
can_sample_self.memory = 1776
can_sample_self.memory = 1776


 22%|█████████████████▊                                                               | 44/200 [01:07<08:07,  3.12s/it]

nan
can_sample_self.memory = 1777
can_sample_self.memory = 1777
nan
can_sample_self.memory = 1778
can_sample_self.memory = 1778
nan
can_sample_self.memory = 1779
can_sample_self.memory = 1779
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1780
can_sample_self.memory = 1780
nan
can_sample_self.memory = 1781
can_sample_self.memory = 1781
nan
can_sample_self.memory = 1782
can_sample_self.memory = 1782
nan
can_sample_self.memory = 1783
can_sample_self.memory = 1783
nan
can_sample_self.memory = 1784
can_sample_self.memory = 1784
nan
can_sample_self.memory = 1785
can_sample_self.memory = 1785


 22%|██████████████████▏                                                              | 45/200 [01:07<05:58,  2.31s/it]

nan
can_sample_self.memory = 1786
can_sample_self.memory = 1786
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1787
can_sample_self.memory = 1787
nan
can_sample_self.memory = 1788
can_sample_self.memory = 1788
nan
can_sample_self.memory = 1789
can_sample_self.memory = 1789
nan
can_sample_self.memory = 1790
can_sample_self.memory = 1790
nan
can_sample_self.memory = 1791
can_sample_self.memory = 1791
nan


 23%|██████████████████▋                                                              | 46/200 [01:08<04:33,  1.78s/it]

can_sample_self.memory = 1792
can_sample_self.memory = 1792
nan
can_sample_self.memory = 1793
can_sample_self.memory = 1793
nan
can_sample_self.memory = 1794
can_sample_self.memory = 1794
nan
can_sample_self.memory = 1795
can_sample_self.memory = 1795
nan
can_sample_self.memory = 1796
can_sample_self.memory = 1796
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1797
can_sample_self.memory = 1797
nan
can_sample_self.memory = 1798
can_sample_self.memory = 1798
nan
can_sample_self.memory = 1799
can_sample_self.memory = 1799
nan
can_sample_self.memory = 1800
can_sample_self.memory = 1800
nan
can_sample_self.memory = 1801
can_sample_self.memory = 1801
nan
can_sample_self.memory = 1802
can_sample_self.memory = 1802
nan
can_sample_self.memory = 1803
can_sample_self.memory = 1803
nan
can_sample_self.memory = 1804
can_sample_self.memory = 1804
nan
can_sample_self.memory = 1805
can_sample_self.memory = 1805
nan
can_sample_self.memory = 1806
can_sample_self.memory = 1806

 24%|███████████████████                                                              | 47/200 [01:09<04:27,  1.75s/it]

nan
can_sample_self.memory = 1839
can_sample_self.memory = 1839
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1840
can_sample_self.memory = 1840
nan
can_sample_self.memory = 1841
can_sample_self.memory = 1841
nan
can_sample_self.memory = 1842
can_sample_self.memory = 1842
nan
can_sample_self.memory = 1843
can_sample_self.memory = 1843
nan
can_sample_self.memory = 1844
can_sample_self.memory = 1844
nan
can_sample_self.memory = 1845
can_sample_self.memory = 1845
nan
can_sample_self.memory = 1846
can_sample_self.memory = 1846
nan
can_sample_self.memory = 1847
can_sample_self.memory = 1847
nan
can_sample_self.memory = 1848
can_sample_self.memory = 1848
nan
can_sample_self.memory = 1849
can_sample_self.memory = 1849
nan
can_sample_self.memory = 1850
can_sample_self.memory = 1850
nan
can_sample_self.memory = 1851
can_sample_self.memory = 1851
nan
can_sample_self.memory = 1852
can_sample_self.memory = 1852
nan
can_sample_self.memory = 1853
can_sample_self.memory = 

 24%|███████████████████▍                                                             | 48/200 [01:13<05:36,  2.22s/it]

nan
can_sample_self.memory = 1926
can_sample_self.memory = 1926
nan
can_sample_self.memory = 1927
can_sample_self.memory = 1927
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 1928
can_sample_self.memory = 1928
nan
can_sample_self.memory = 1929
can_sample_self.memory = 1929
nan
can_sample_self.memory = 1930
can_sample_self.memory = 1930
nan
can_sample_self.memory = 1931
can_sample_self.memory = 1931
nan
can_sample_self.memory = 1932
can_sample_self.memory = 1932
nan
can_sample_self.memory = 1933
can_sample_self.memory = 1933
nan
can_sample_self.memory = 1934
can_sample_self.memory = 1934
nan
can_sample_self.memory = 1935
can_sample_self.memory = 1935
nan
can_sample_self.memory = 1936
can_sample_self.memory = 1936
nan
can_sample_self.memory = 1937
can_sample_self.memory = 1937
nan
can_sample_self.memory = 1938
can_sample_self.memory = 1938
nan
can_sample_self.memory = 1939
can_sample_self.memory = 1939
nan
can_sample_self.memory = 1940
can_sample_self.memory = 

 24%|███████████████████▊                                                             | 49/200 [01:17<06:51,  2.72s/it]

can_sample_self.memory = 2028
nan
can_sample_self.memory = 2029
can_sample_self.memory = 2029
nan
can_sample_self.memory = 2030
can_sample_self.memory = 2030
nan
can_sample_self.memory = 2031
can_sample_self.memory = 2031
nan
can_sample_self.memory = 2032
can_sample_self.memory = 2032
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2033
can_sample_self.memory = 2033
nan
can_sample_self.memory = 2034
can_sample_self.memory = 2034
nan
can_sample_self.memory = 2035
can_sample_self.memory = 2035
nan
can_sample_self.memory = 2036
can_sample_self.memory = 2036
nan
can_sample_self.memory = 2037
can_sample_self.memory = 2037
nan
can_sample_self.memory = 2038
can_sample_self.memory = 2038
nan
can_sample_self.memory = 2039
can_sample_self.memory = 2039
nan
can_sample_self.memory = 2040
can_sample_self.memory = 2040
nan
can_sample_self.memory = 2041
can_sample_self.memory = 2041
nan
can_sample_self.memory = 2042
can_sample_self.memory = 2042
nan
can_sample_self.memory = 

nan
can_sample_self.memory = 2157
can_sample_self.memory = 2157
nan
can_sample_self.memory = 2158
can_sample_self.memory = 2158
nan
can_sample_self.memory = 2159
can_sample_self.memory = 2159
nan
can_sample_self.memory = 2160
can_sample_self.memory = 2160
nan
can_sample_self.memory = 2161
can_sample_self.memory = 2161
nan
can_sample_self.memory = 2162
can_sample_self.memory = 2162
nan
can_sample_self.memory = 2163
can_sample_self.memory = 2163
nan
can_sample_self.memory = 2164
can_sample_self.memory = 2164
nan
can_sample_self.memory = 2165
can_sample_self.memory = 2165
nan
can_sample_self.memory = 2166
can_sample_self.memory = 2166
nan
can_sample_self.memory = 2167
can_sample_self.memory = 2167
nan
can_sample_self.memory = 2168
can_sample_self.memory = 2168
nan
can_sample_self.memory = 2169
can_sample_self.memory = 2169
nan
can_sample_self.memory = 2170
can_sample_self.memory = 2170
nan
can_sample_self.memory = 2171
can_sample_self.memory = 2171
nan
can_sample_self.memory = 2172
can_sa

 25%|████████████████████▎                                                            | 50/200 [01:22<08:43,  3.49s/it]

can_sample_self.memory = 2174
can_sample_self.memory = 2174
nan
can_sample_self.memory = 2175
can_sample_self.memory = 2175
nan
can_sample_self.memory = 2176
can_sample_self.memory = 2176
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2177
can_sample_self.memory = 2177
nan
can_sample_self.memory = 2178
can_sample_self.memory = 2178
nan
can_sample_self.memory = 2179
can_sample_self.memory = 2179
nan
can_sample_self.memory = 2180
can_sample_self.memory = 2180
nan
can_sample_self.memory = 2181
can_sample_self.memory = 2181
nan
can_sample_self.memory = 2182
can_sample_self.memory = 2182
nan
can_sample_self.memory = 2183
can_sample_self.memory = 2183
nan
can_sample_self.memory = 2184
can_sample_self.memory = 2184
nan
can_sample_self.memory = 2185
can_sample_self.memory = 2185
nan
can_sample_self.memory = 2186
can_sample_self.memory = 2186
nan
can_sample_self.memory = 2187
can_sample_self.memory = 2187
nan
can_sample_self.memory = 2188
can_sample_self.memory = 2188

 26%|████████████████████▋                                                            | 51/200 [01:25<08:34,  3.45s/it]

nan
can_sample_self.memory = 2265
can_sample_self.memory = 2265
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2266
can_sample_self.memory = 2266
nan
can_sample_self.memory = 2267
can_sample_self.memory = 2267
nan
can_sample_self.memory = 2268
can_sample_self.memory = 2268
nan
can_sample_self.memory = 2269
can_sample_self.memory = 2269
nan
can_sample_self.memory = 2270
can_sample_self.memory = 2270
nan
can_sample_self.memory = 2271
can_sample_self.memory = 2271
nan
can_sample_self.memory = 2272
can_sample_self.memory = 2272
nan
can_sample_self.memory = 2273
can_sample_self.memory = 2273
nan
can_sample_self.memory = 2274
can_sample_self.memory = 2274
nan
can_sample_self.memory = 2275
can_sample_self.memory = 2275
nan
can_sample_self.memory = 2276
can_sample_self.memory = 2276
nan
can_sample_self.memory = 2277
can_sample_self.memory = 2277
nan
can_sample_self.memory = 2278
can_sample_self.memory = 2278
nan
can_sample_self.memory = 2279
can_sample_self.memory = 

 26%|█████████████████████                                                            | 52/200 [01:28<08:09,  3.31s/it]

can_sample_self.memory = 2342
can_sample_self.memory = 2342
nan
can_sample_self.memory = 2343
can_sample_self.memory = 2343
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2344
can_sample_self.memory = 2344
nan
can_sample_self.memory = 2345
can_sample_self.memory = 2345
nan
can_sample_self.memory = 2346
can_sample_self.memory = 2346
nan
can_sample_self.memory = 2347
can_sample_self.memory = 2347
nan
can_sample_self.memory = 2348
can_sample_self.memory = 2348
nan
can_sample_self.memory = 2349
can_sample_self.memory = 2349


 26%|█████████████████████▍                                                           | 53/200 [01:29<06:04,  2.48s/it]

nan
can_sample_self.memory = 2350
can_sample_self.memory = 2350
nan
can_sample_self.memory = 2351
can_sample_self.memory = 2351
nan
can_sample_self.memory = 2352
can_sample_self.memory = 2352
nan
can_sample_self.memory = 2353
can_sample_self.memory = 2353
nan
can_sample_self.memory = 2354
can_sample_self.memory = 2354
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2355
can_sample_self.memory = 2355
nan
can_sample_self.memory = 2356
can_sample_self.memory = 2356
nan
can_sample_self.memory = 2357
can_sample_self.memory = 2357
nan
can_sample_self.memory = 2358
can_sample_self.memory = 2358
nan
can_sample_self.memory = 2359
can_sample_self.memory = 2359
nan
can_sample_self.memory = 2360
can_sample_self.memory = 2360
nan
can_sample_self.memory = 2361
can_sample_self.memory = 2361
nan
can_sample_self.memory = 2362
can_sample_self.memory = 2362
nan
can_sample_self.memory = 2363
can_sample_self.memory = 2363
nan
can_sample_self.memory = 2364
can_sample_self.memory = 

 27%|█████████████████████▊                                                           | 54/200 [01:31<05:37,  2.31s/it]

can_sample_self.memory = 2400
can_sample_self.memory = 2400
nan
can_sample_self.memory = 2401
can_sample_self.memory = 2401
nan
can_sample_self.memory = 2402
can_sample_self.memory = 2402
nan
can_sample_self.memory = 2403
can_sample_self.memory = 2403
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2404
can_sample_self.memory = 2404
nan
can_sample_self.memory = 2405
can_sample_self.memory = 2405
nan
can_sample_self.memory = 2406
can_sample_self.memory = 2406
nan
can_sample_self.memory = 2407
can_sample_self.memory = 2407
nan
can_sample_self.memory = 2408
can_sample_self.memory = 2408
nan
can_sample_self.memory = 2409
can_sample_self.memory = 2409
nan
can_sample_self.memory = 2410
can_sample_self.memory = 2410
nan
can_sample_self.memory = 2411
can_sample_self.memory = 2411
nan
can_sample_self.memory = 2412
can_sample_self.memory = 2412
nan
can_sample_self.memory = 2413
can_sample_self.memory = 2413
nan
can_sample_self.memory = 2414
can_sample_self.memory = 2414

 28%|██████████████████████▎                                                          | 55/200 [01:32<05:08,  2.13s/it]

nan
can_sample_self.memory = 2444
can_sample_self.memory = 2444
nan
can_sample_self.memory = 2445
can_sample_self.memory = 2445
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2446
can_sample_self.memory = 2446
nan
can_sample_self.memory = 2447
can_sample_self.memory = 2447
nan
can_sample_self.memory = 2448
can_sample_self.memory = 2448
nan
can_sample_self.memory = 2449
can_sample_self.memory = 2449
nan
can_sample_self.memory = 2450
can_sample_self.memory = 2450
nan
can_sample_self.memory = 2451
can_sample_self.memory = 2451
nan
can_sample_self.memory = 2452
can_sample_self.memory = 2452
nan
can_sample_self.memory = 2453
can_sample_self.memory = 2453
nan
can_sample_self.memory = 2454
can_sample_self.memory = 2454
nan
can_sample_self.memory = 2455
can_sample_self.memory = 2455
nan
can_sample_self.memory = 2456
can_sample_self.memory = 2456
nan
can_sample_self.memory = 2457
can_sample_self.memory = 2457
nan
can_sample_self.memory = 2458
can_sample_self.memory = 

can_sample_self.memory = 2574
can_sample_self.memory = 2574
nan
can_sample_self.memory = 2575
can_sample_self.memory = 2575
nan
can_sample_self.memory = 2576
can_sample_self.memory = 2576
nan
can_sample_self.memory = 2577
can_sample_self.memory = 2577
nan
can_sample_self.memory = 2578
can_sample_self.memory = 2578
nan
can_sample_self.memory = 2579
can_sample_self.memory = 2579
nan
can_sample_self.memory = 2580
can_sample_self.memory = 2580
nan
can_sample_self.memory = 2581
can_sample_self.memory = 2581
nan
can_sample_self.memory = 2582
can_sample_self.memory = 2582
nan
can_sample_self.memory = 2583
can_sample_self.memory = 2583
nan
can_sample_self.memory = 2584
can_sample_self.memory = 2584
nan
can_sample_self.memory = 2585
can_sample_self.memory = 2585
nan
can_sample_self.memory = 2586
can_sample_self.memory = 2586
nan
can_sample_self.memory = 2587
can_sample_self.memory = 2587
nan
can_sample_self.memory = 2588
can_sample_self.memory = 2588
nan
can_sample_self.memory = 2589
can_sample

can_sample_self.memory = 2707
can_sample_self.memory = 2707
nan
can_sample_self.memory = 2708
can_sample_self.memory = 2708
nan
can_sample_self.memory = 2709
can_sample_self.memory = 2709
nan
can_sample_self.memory = 2710
can_sample_self.memory = 2710
nan
can_sample_self.memory = 2711
can_sample_self.memory = 2711
nan
can_sample_self.memory = 2712
can_sample_self.memory = 2712
nan
can_sample_self.memory = 2713
can_sample_self.memory = 2713
nan
can_sample_self.memory = 2714
can_sample_self.memory = 2714
nan
can_sample_self.memory = 2715
can_sample_self.memory = 2715
nan
can_sample_self.memory = 2716
can_sample_self.memory = 2716
nan
can_sample_self.memory = 2717
can_sample_self.memory = 2717
nan
can_sample_self.memory = 2718
can_sample_self.memory = 2718
nan
can_sample_self.memory = 2719
can_sample_self.memory = 2719
nan
can_sample_self.memory = 2720
can_sample_self.memory = 2720
nan
can_sample_self.memory = 2721
can_sample_self.memory = 2721
nan
can_sample_self.memory = 2722
can_sample

 28%|██████████████████████▋                                                          | 56/200 [01:43<11:14,  4.68s/it]

nan
can_sample_self.memory = 2733
can_sample_self.memory = 2733
nan
can_sample_self.memory = 2734
can_sample_self.memory = 2734
nan
can_sample_self.memory = 2735
can_sample_self.memory = 2735
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2736
can_sample_self.memory = 2736
nan
can_sample_self.memory = 2737
can_sample_self.memory = 2737
nan
can_sample_self.memory = 2738
can_sample_self.memory = 2738
nan
can_sample_self.memory = 2739
can_sample_self.memory = 2739
nan
can_sample_self.memory = 2740
can_sample_self.memory = 2740
nan
can_sample_self.memory = 2741
can_sample_self.memory = 2741


 28%|███████████████████████                                                          | 57/200 [01:43<08:07,  3.41s/it]

nan
can_sample_self.memory = 2742
can_sample_self.memory = 2742
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2743
can_sample_self.memory = 2743
nan
can_sample_self.memory = 2744
can_sample_self.memory = 2744
nan
can_sample_self.memory = 2745
can_sample_self.memory = 2745
nan
can_sample_self.memory = 2746
can_sample_self.memory = 2746
nan
can_sample_self.memory = 2747
can_sample_self.memory = 2747
nan
can_sample_self.memory = 2748
can_sample_self.memory = 2748
nan
can_sample_self.memory = 2749
can_sample_self.memory = 2749
nan
can_sample_self.memory = 2750
can_sample_self.memory = 2750
nan
can_sample_self.memory = 2751
can_sample_self.memory = 2751
nan
can_sample_self.memory = 2752
can_sample_self.memory = 2752
nan
can_sample_self.memory = 2753
can_sample_self.memory = 2753
nan


 29%|███████████████████████▍                                                         | 58/200 [01:44<06:04,  2.57s/it]

can_sample_self.memory = 2754
can_sample_self.memory = 2754
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2755
can_sample_self.memory = 2755
nan
can_sample_self.memory = 2756
can_sample_self.memory = 2756
nan
can_sample_self.memory = 2757
can_sample_self.memory = 2757
nan
can_sample_self.memory = 2758
can_sample_self.memory = 2758
nan
can_sample_self.memory = 2759
can_sample_self.memory = 2759
nan
can_sample_self.memory = 2760
can_sample_self.memory = 2760
nan
can_sample_self.memory = 2761
can_sample_self.memory = 2761
nan
can_sample_self.memory = 2762
can_sample_self.memory = 2762
nan
can_sample_self.memory = 2763
can_sample_self.memory = 2763
nan
can_sample_self.memory = 2764
can_sample_self.memory = 2764
nan
can_sample_self.memory = 2765
can_sample_self.memory = 2765
nan
can_sample_self.memory = 2766
can_sample_self.memory = 2766
nan
can_sample_self.memory = 2767
can_sample_self.memory = 2767
nan
can_sample_self.memory = 2768
can_sample_self.memory = 2768

nan
can_sample_self.memory = 2882
can_sample_self.memory = 2882
nan
can_sample_self.memory = 2883
can_sample_self.memory = 2883
nan
can_sample_self.memory = 2884
can_sample_self.memory = 2884
nan
can_sample_self.memory = 2885
can_sample_self.memory = 2885
nan
can_sample_self.memory = 2886
can_sample_self.memory = 2886
nan
can_sample_self.memory = 2887
can_sample_self.memory = 2887
nan
can_sample_self.memory = 2888
can_sample_self.memory = 2888
nan
can_sample_self.memory = 2889
can_sample_self.memory = 2889
nan
can_sample_self.memory = 2890
can_sample_self.memory = 2890
nan
can_sample_self.memory = 2891
can_sample_self.memory = 2891
nan
can_sample_self.memory = 2892
can_sample_self.memory = 2892
nan
can_sample_self.memory = 2893
can_sample_self.memory = 2893
nan
can_sample_self.memory = 2894
can_sample_self.memory = 2894
nan
can_sample_self.memory = 2895
can_sample_self.memory = 2895
nan
can_sample_self.memory = 2896
can_sample_self.memory = 2896
nan
can_sample_self.memory = 2897
can_sa

 30%|███████████████████████▉                                                         | 59/200 [01:49<08:01,  3.41s/it]

nan
can_sample_self.memory = 2900
can_sample_self.memory = 2900
nan
can_sample_self.memory = 2901
can_sample_self.memory = 2901
nan
can_sample_self.memory = 2902
can_sample_self.memory = 2902
nan
can_sample_self.memory = 2903
can_sample_self.memory = 2903
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2904
can_sample_self.memory = 2904
nan
can_sample_self.memory = 2905
can_sample_self.memory = 2905
nan
can_sample_self.memory = 2906
can_sample_self.memory = 2906
nan
can_sample_self.memory = 2907
can_sample_self.memory = 2907
nan
can_sample_self.memory = 2908
can_sample_self.memory = 2908
nan
can_sample_self.memory = 2909
can_sample_self.memory = 2909
nan
can_sample_self.memory = 2910
can_sample_self.memory = 2910
nan
can_sample_self.memory = 2911
can_sample_self.memory = 2911
nan
can_sample_self.memory = 2912
can_sample_self.memory = 2912
nan
can_sample_self.memory = 2913
can_sample_self.memory = 2913
nan
can_sample_self.memory = 2914
can_sample_self.memory = 

 30%|████████████████████████▎                                                        | 60/200 [01:50<06:17,  2.70s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 2927
can_sample_self.memory = 2927
nan
can_sample_self.memory = 2928
can_sample_self.memory = 2928
nan
can_sample_self.memory = 2929
can_sample_self.memory = 2929
nan
can_sample_self.memory = 2930
can_sample_self.memory = 2930
nan
can_sample_self.memory = 2931
can_sample_self.memory = 2931
nan
can_sample_self.memory = 2932
can_sample_self.memory = 2932
nan
can_sample_self.memory = 2933
can_sample_self.memory = 2933
nan
can_sample_self.memory = 2934
can_sample_self.memory = 2934
nan
can_sample_self.memory = 2935
can_sample_self.memory = 2935
nan
can_sample_self.memory = 2936
can_sample_self.memory = 2936
nan
can_sample_self.memory = 2937
can_sample_self.memory = 2937
nan
can_sample_self.memory = 2938
can_sample_self.memory = 2938
nan
can_sample_self.memory = 2939
can_sample_self.memory = 2939
nan
can_sample_self.memory = 2940
can_sample_self.memory = 2940
nan
can_sample_self.memory = 2941
can_sample_self.memory = 

 30%|████████████████████████▋                                                        | 61/200 [01:54<07:09,  3.09s/it]

nan
can_sample_self.memory = 3031
can_sample_self.memory = 3031
nan
can_sample_self.memory = 3032
can_sample_self.memory = 3032
nan
can_sample_self.memory = 3033
can_sample_self.memory = 3033
nan
can_sample_self.memory = 3034
can_sample_self.memory = 3034
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3035
can_sample_self.memory = 3035
nan
can_sample_self.memory = 3036
can_sample_self.memory = 3036
nan
can_sample_self.memory = 3037
can_sample_self.memory = 3037
nan
can_sample_self.memory = 3038
can_sample_self.memory = 3038
nan
can_sample_self.memory = 3039
can_sample_self.memory = 3039
nan
can_sample_self.memory = 3040
can_sample_self.memory = 3040
nan
can_sample_self.memory = 3041
can_sample_self.memory = 3041
nan
can_sample_self.memory = 3042
can_sample_self.memory = 3042
nan
can_sample_self.memory = 3043
can_sample_self.memory = 3043
nan
can_sample_self.memory = 3044
can_sample_self.memory = 3044
nan
can_sample_self.memory = 3045
can_sample_self.memory = 

nan
can_sample_self.memory = 3159
can_sample_self.memory = 3159
nan
can_sample_self.memory = 3160
can_sample_self.memory = 3160
nan
can_sample_self.memory = 3161
can_sample_self.memory = 3161
nan
can_sample_self.memory = 3162
can_sample_self.memory = 3162
nan
can_sample_self.memory = 3163
can_sample_self.memory = 3163
nan
can_sample_self.memory = 3164
can_sample_self.memory = 3164
nan
can_sample_self.memory = 3165
can_sample_self.memory = 3165
nan
can_sample_self.memory = 3166
can_sample_self.memory = 3166
nan
can_sample_self.memory = 3167
can_sample_self.memory = 3167
nan
can_sample_self.memory = 3168
can_sample_self.memory = 3168
nan
can_sample_self.memory = 3169
can_sample_self.memory = 3169
nan
can_sample_self.memory = 3170
can_sample_self.memory = 3170
nan
can_sample_self.memory = 3171
can_sample_self.memory = 3171
nan
can_sample_self.memory = 3172
can_sample_self.memory = 3172
nan
can_sample_self.memory = 3173
can_sample_self.memory = 3173
nan
can_sample_self.memory = 3174
can_sa

 31%|█████████████████████████                                                        | 62/200 [02:00<08:44,  3.80s/it]

nan
can_sample_self.memory = 3178
can_sample_self.memory = 3178
nan
can_sample_self.memory = 3179
can_sample_self.memory = 3179
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3180
can_sample_self.memory = 3180
nan
can_sample_self.memory = 3181
can_sample_self.memory = 3181
nan
can_sample_self.memory = 3182
can_sample_self.memory = 3182
nan
can_sample_self.memory = 3183
can_sample_self.memory = 3183
nan
can_sample_self.memory = 3184
can_sample_self.memory = 3184
nan
can_sample_self.memory = 3185
can_sample_self.memory = 3185
nan
can_sample_self.memory = 3186
can_sample_self.memory = 3186
nan
can_sample_self.memory = 3187
can_sample_self.memory = 3187
nan
can_sample_self.memory = 3188
can_sample_self.memory = 3188
nan
can_sample_self.memory = 3189
can_sample_self.memory = 3189
nan
can_sample_self.memory = 3190
can_sample_self.memory = 3190
nan
can_sample_self.memory = 3191
can_sample_self.memory = 3191
nan
can_sample_self.memory = 3192
can_sample_self.memory = 

 32%|█████████████████████████▌                                                       | 63/200 [02:04<08:59,  3.94s/it]

nan
can_sample_self.memory = 3291
can_sample_self.memory = 3291
nan
can_sample_self.memory = 3292
can_sample_self.memory = 3292
nan
can_sample_self.memory = 3293
can_sample_self.memory = 3293
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3294
can_sample_self.memory = 3294
nan
can_sample_self.memory = 3295
can_sample_self.memory = 3295
nan
can_sample_self.memory = 3296
can_sample_self.memory = 3296
nan
can_sample_self.memory = 3297
can_sample_self.memory = 3297
nan
can_sample_self.memory = 3298
can_sample_self.memory = 3298
nan
can_sample_self.memory = 3299
can_sample_self.memory = 3299
nan
can_sample_self.memory = 3300
can_sample_self.memory = 3300
nan
can_sample_self.memory = 3301
can_sample_self.memory = 3301
nan
can_sample_self.memory = 3302
can_sample_self.memory = 3302
nan
can_sample_self.memory = 3303
can_sample_self.memory = 3303
nan
can_sample_self.memory = 3304
can_sample_self.memory = 3304
nan
can_sample_self.memory = 3305
can_sample_self.memory = 

 32%|█████████████████████████▉                                                       | 64/200 [02:07<07:51,  3.47s/it]

nan
can_sample_self.memory = 3353
can_sample_self.memory = 3353
nan
can_sample_self.memory = 3354
can_sample_self.memory = 3354
nan
can_sample_self.memory = 3355
can_sample_self.memory = 3355
nan
can_sample_self.memory = 3356
can_sample_self.memory = 3356
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3357
can_sample_self.memory = 3357
nan
can_sample_self.memory = 3358
can_sample_self.memory = 3358
nan
can_sample_self.memory = 3359
can_sample_self.memory = 3359
nan
can_sample_self.memory = 3360
can_sample_self.memory = 3360
nan
can_sample_self.memory = 3361
can_sample_self.memory = 3361
nan
can_sample_self.memory = 3362
can_sample_self.memory = 3362
nan
can_sample_self.memory = 3363
can_sample_self.memory = 3363
nan
can_sample_self.memory = 3364
can_sample_self.memory = 3364
nan
can_sample_self.memory = 3365
can_sample_self.memory = 3365
nan
can_sample_self.memory = 3366
can_sample_self.memory = 3366
nan
can_sample_self.memory = 3367
can_sample_self.memory = 

 32%|██████████████████████████▎                                                      | 65/200 [02:08<06:45,  3.00s/it]

nan
can_sample_self.memory = 3403
can_sample_self.memory = 3403
nan
can_sample_self.memory = 3404
can_sample_self.memory = 3404
nan
can_sample_self.memory = 3405
can_sample_self.memory = 3405
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3406
can_sample_self.memory = 3406
nan
can_sample_self.memory = 3407
can_sample_self.memory = 3407
nan
can_sample_self.memory = 3408
can_sample_self.memory = 3408
nan
can_sample_self.memory = 3409
can_sample_self.memory = 3409
nan
can_sample_self.memory = 3410
can_sample_self.memory = 3410
nan
can_sample_self.memory = 3411
can_sample_self.memory = 3411
nan
can_sample_self.memory = 3412
can_sample_self.memory = 3412
nan
can_sample_self.memory = 3413
can_sample_self.memory = 3413
nan
can_sample_self.memory = 3414
can_sample_self.memory = 3414
nan
can_sample_self.memory = 3415
can_sample_self.memory = 3415
nan
can_sample_self.memory = 3416
can_sample_self.memory = 3416
nan
can_sample_self.memory = 3417
can_sample_self.memory = 

nan
can_sample_self.memory = 3531
can_sample_self.memory = 3531
nan
can_sample_self.memory = 3532
can_sample_self.memory = 3532
nan
can_sample_self.memory = 3533
can_sample_self.memory = 3533
nan
can_sample_self.memory = 3534
can_sample_self.memory = 3534
nan
can_sample_self.memory = 3535
can_sample_self.memory = 3535
nan
can_sample_self.memory = 3536
can_sample_self.memory = 3536
nan
can_sample_self.memory = 3537
can_sample_self.memory = 3537
nan
can_sample_self.memory = 3538
can_sample_self.memory = 3538
nan
can_sample_self.memory = 3539
can_sample_self.memory = 3539
nan
can_sample_self.memory = 3540
can_sample_self.memory = 3540
nan
can_sample_self.memory = 3541
can_sample_self.memory = 3541
nan
can_sample_self.memory = 3542
can_sample_self.memory = 3542
nan
can_sample_self.memory = 3543
can_sample_self.memory = 3543
nan
can_sample_self.memory = 3544
can_sample_self.memory = 3544
nan
can_sample_self.memory = 3545
can_sample_self.memory = 3545
nan
can_sample_self.memory = 3546
can_sa

 33%|██████████████████████████▋                                                      | 66/200 [02:15<09:09,  4.10s/it]

nan
can_sample_self.memory = 3583
can_sample_self.memory = 3583
nan
can_sample_self.memory = 3584
can_sample_self.memory = 3584
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3585
can_sample_self.memory = 3585
nan
can_sample_self.memory = 3586
can_sample_self.memory = 3586
nan
can_sample_self.memory = 3587
can_sample_self.memory = 3587
nan
can_sample_self.memory = 3588
can_sample_self.memory = 3588
nan
can_sample_self.memory = 3589
can_sample_self.memory = 3589
nan
can_sample_self.memory = 3590
can_sample_self.memory = 3590
nan
can_sample_self.memory = 3591
can_sample_self.memory = 3591
nan
can_sample_self.memory = 3592
can_sample_self.memory = 3592
nan
can_sample_self.memory = 3593
can_sample_self.memory = 3593
nan
can_sample_self.memory = 3594
can_sample_self.memory = 3594
nan
can_sample_self.memory = 3595
can_sample_self.memory = 3595
nan
can_sample_self.memory = 3596
can_sample_self.memory = 3596
nan
can_sample_self.memory = 3597
can_sample_self.memory = 

 34%|███████████████████████████▏                                                     | 67/200 [02:18<08:31,  3.85s/it]

nan
can_sample_self.memory = 3667
can_sample_self.memory = 3667
nan
can_sample_self.memory = 3668
can_sample_self.memory = 3668
nan
can_sample_self.memory = 3669
can_sample_self.memory = 3669
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3670
can_sample_self.memory = 3670
nan
can_sample_self.memory = 3671
can_sample_self.memory = 3671
nan
can_sample_self.memory = 3672
can_sample_self.memory = 3672
nan
can_sample_self.memory = 3673
can_sample_self.memory = 3673
nan
can_sample_self.memory = 3674
can_sample_self.memory = 3674
nan
can_sample_self.memory = 3675
can_sample_self.memory = 3675
nan
can_sample_self.memory = 3676
can_sample_self.memory = 3676
nan
can_sample_self.memory = 3677
can_sample_self.memory = 3677
nan
can_sample_self.memory = 3678
can_sample_self.memory = 3678
nan
can_sample_self.memory = 3679
can_sample_self.memory = 3679
nan
can_sample_self.memory = 3680
can_sample_self.memory = 3680
nan
can_sample_self.memory = 3681
can_sample_self.memory = 

 34%|███████████████████████████▌                                                     | 68/200 [02:21<07:44,  3.52s/it]

can_sample_self.memory = 3740
can_sample_self.memory = 3740
nan
can_sample_self.memory = 3741
can_sample_self.memory = 3741
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3742
can_sample_self.memory = 3742
nan
can_sample_self.memory = 3743
can_sample_self.memory = 3743
nan
can_sample_self.memory = 3744
can_sample_self.memory = 3744
nan
can_sample_self.memory = 3745
can_sample_self.memory = 3745
nan
can_sample_self.memory = 3746
can_sample_self.memory = 3746
nan
can_sample_self.memory = 3747
can_sample_self.memory = 3747
nan
can_sample_self.memory = 3748
can_sample_self.memory = 3748
nan
can_sample_self.memory = 3749
can_sample_self.memory = 3749
nan
can_sample_self.memory = 3750
can_sample_self.memory = 3750
nan
can_sample_self.memory = 3751
can_sample_self.memory = 3751
nan
can_sample_self.memory = 3752
can_sample_self.memory = 3752
nan
can_sample_self.memory = 3753
can_sample_self.memory = 3753
nan
can_sample_self.memory = 3754
can_sample_self.memory = 3754

 34%|███████████████████████████▉                                                     | 69/200 [02:22<05:54,  2.71s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3759
can_sample_self.memory = 3759
nan
can_sample_self.memory = 3760
can_sample_self.memory = 3760
nan
can_sample_self.memory = 3761
can_sample_self.memory = 3761
nan
can_sample_self.memory = 3762
can_sample_self.memory = 3762
nan
can_sample_self.memory = 3763
can_sample_self.memory = 3763
nan
can_sample_self.memory = 3764
can_sample_self.memory = 3764
nan
can_sample_self.memory = 3765
can_sample_self.memory = 3765
nan
can_sample_self.memory = 3766
can_sample_self.memory = 3766
nan
can_sample_self.memory = 3767
can_sample_self.memory = 3767
nan
can_sample_self.memory = 3768
can_sample_self.memory = 3768
nan
can_sample_self.memory = 3769
can_sample_self.memory = 3769
nan
can_sample_self.memory = 3770
can_sample_self.memory = 3770
nan
can_sample_self.memory = 3771
can_sample_self.memory = 3771
nan
can_sample_self.memory = 3772
can_sample_self.memory = 3772
nan
can_sample_self.memory = 3773
can_sample_self.memory = 

 35%|████████████████████████████▎                                                    | 70/200 [02:26<06:28,  2.99s/it]

nan
can_sample_self.memory = 3855
can_sample_self.memory = 3855
nan
can_sample_self.memory = 3856
can_sample_self.memory = 3856
nan
can_sample_self.memory = 3857
can_sample_self.memory = 3857
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3858
can_sample_self.memory = 3858
nan
can_sample_self.memory = 3859
can_sample_self.memory = 3859
nan
can_sample_self.memory = 3860
can_sample_self.memory = 3860
nan
can_sample_self.memory = 3861
can_sample_self.memory = 3861
nan
can_sample_self.memory = 3862
can_sample_self.memory = 3862
nan
can_sample_self.memory = 3863
can_sample_self.memory = 3863
nan
can_sample_self.memory = 3864
can_sample_self.memory = 3864
nan
can_sample_self.memory = 3865
can_sample_self.memory = 3865
nan
can_sample_self.memory = 3866
can_sample_self.memory = 3866
nan
can_sample_self.memory = 3867
can_sample_self.memory = 3867
nan
can_sample_self.memory = 3868
can_sample_self.memory = 3868
nan
can_sample_self.memory = 3869
can_sample_self.memory = 

 36%|████████████████████████████▊                                                    | 71/200 [02:30<07:00,  3.26s/it]

nan
can_sample_self.memory = 3959
can_sample_self.memory = 3959
nan
can_sample_self.memory = 3960
can_sample_self.memory = 3960
nan
can_sample_self.memory = 3961
can_sample_self.memory = 3961
nan
can_sample_self.memory = 3962
can_sample_self.memory = 3962
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 3963
can_sample_self.memory = 3963
nan
can_sample_self.memory = 3964
can_sample_self.memory = 3964
nan
can_sample_self.memory = 3965
can_sample_self.memory = 3965
nan
can_sample_self.memory = 3966
can_sample_self.memory = 3966
nan
can_sample_self.memory = 3967
can_sample_self.memory = 3967
nan
can_sample_self.memory = 3968
can_sample_self.memory = 3968
nan
can_sample_self.memory = 3969
can_sample_self.memory = 3969
nan
can_sample_self.memory = 3970
can_sample_self.memory = 3970
nan
can_sample_self.memory = 3971
can_sample_self.memory = 3971
nan
can_sample_self.memory = 3972
can_sample_self.memory = 3972
nan
can_sample_self.memory = 3973
can_sample_self.memory = 

 36%|█████████████████████████████▏                                                   | 72/200 [02:31<05:50,  2.74s/it]

can_sample_self.memory = 3997
can_sample_self.memory = 3997
nan
can_sample_self.memory = 3998
can_sample_self.memory = 3998
nan
can_sample_self.memory = 3999
can_sample_self.memory = 3999
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4000
can_sample_self.memory = 4000
nan
can_sample_self.memory = 4001
can_sample_self.memory = 4001
nan
can_sample_self.memory = 4002
can_sample_self.memory = 4002
nan
can_sample_self.memory = 4003
can_sample_self.memory = 4003
nan
can_sample_self.memory = 4004
can_sample_self.memory = 4004
nan
can_sample_self.memory = 4005
can_sample_self.memory = 4005
nan
can_sample_self.memory = 4006
can_sample_self.memory = 4006
nan
can_sample_self.memory = 4007
can_sample_self.memory = 4007
nan
can_sample_self.memory = 4008
can_sample_self.memory = 4008
nan
can_sample_self.memory = 4009
can_sample_self.memory = 4009
nan
can_sample_self.memory = 4010
can_sample_self.memory = 4010
nan
can_sample_self.memory = 4011
can_sample_self.memory = 4011

 36%|█████████████████████████████▌                                                   | 73/200 [02:33<05:13,  2.47s/it]

nan
can_sample_self.memory = 4044
can_sample_self.memory = 4044
nan
can_sample_self.memory = 4045
can_sample_self.memory = 4045
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4046
can_sample_self.memory = 4046
nan
can_sample_self.memory = 4047
can_sample_self.memory = 4047
nan
can_sample_self.memory = 4048
can_sample_self.memory = 4048
nan
can_sample_self.memory = 4049
can_sample_self.memory = 4049
nan
can_sample_self.memory = 4050
can_sample_self.memory = 4050
nan
can_sample_self.memory = 4051
can_sample_self.memory = 4051
nan
can_sample_self.memory = 4052
can_sample_self.memory = 4052
nan
can_sample_self.memory = 4053
can_sample_self.memory = 4053
nan
can_sample_self.memory = 4054
can_sample_self.memory = 4054
nan
can_sample_self.memory = 4055
can_sample_self.memory = 4055
nan
can_sample_self.memory = 4056
can_sample_self.memory = 4056
nan
can_sample_self.memory = 4057
can_sample_self.memory = 4057
nan
can_sample_self.memory = 4058
can_sample_self.memory = 

 37%|█████████████████████████████▉                                                   | 74/200 [02:36<05:49,  2.77s/it]

can_sample_self.memory = 4135
can_sample_self.memory = 4135
nan
can_sample_self.memory = 4136
can_sample_self.memory = 4136
nan
can_sample_self.memory = 4137
can_sample_self.memory = 4137
nan
can_sample_self.memory = 4138
can_sample_self.memory = 4138
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4139
can_sample_self.memory = 4139
nan
can_sample_self.memory = 4140
can_sample_self.memory = 4140
nan
can_sample_self.memory = 4141
can_sample_self.memory = 4141
nan
can_sample_self.memory = 4142
can_sample_self.memory = 4142
nan
can_sample_self.memory = 4143
can_sample_self.memory = 4143
nan
can_sample_self.memory = 4144
can_sample_self.memory = 4144
nan
can_sample_self.memory = 4145
can_sample_self.memory = 4145
nan
can_sample_self.memory = 4146
can_sample_self.memory = 4146
nan
can_sample_self.memory = 4147
can_sample_self.memory = 4147
nan
can_sample_self.memory = 4148
can_sample_self.memory = 4148
nan
can_sample_self.memory = 4149
can_sample_self.memory = 4149

 38%|██████████████████████████████▍                                                  | 75/200 [02:39<05:26,  2.61s/it]

nan
can_sample_self.memory = 4192
can_sample_self.memory = 4192
nan
can_sample_self.memory = 4193
can_sample_self.memory = 4193
nan
can_sample_self.memory = 4194
can_sample_self.memory = 4194
nan
can_sample_self.memory = 4195
can_sample_self.memory = 4195
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4196
can_sample_self.memory = 4196
nan
can_sample_self.memory = 4197
can_sample_self.memory = 4197
nan
can_sample_self.memory = 4198
can_sample_self.memory = 4198
nan
can_sample_self.memory = 4199
can_sample_self.memory = 4199
nan
can_sample_self.memory = 4200
can_sample_self.memory = 4200
nan
can_sample_self.memory = 4201
can_sample_self.memory = 4201
nan
can_sample_self.memory = 4202
can_sample_self.memory = 4202
nan
can_sample_self.memory = 4203
can_sample_self.memory = 4203
nan
can_sample_self.memory = 4204
can_sample_self.memory = 4204
nan
can_sample_self.memory = 4205
can_sample_self.memory = 4205
nan
can_sample_self.memory = 4206
can_sample_self.memory = 

 38%|██████████████████████████████▊                                                  | 76/200 [02:39<04:09,  2.02s/it]

can_sample_self.memory = 4207
can_sample_self.memory = 4207
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4208
can_sample_self.memory = 4208
nan
can_sample_self.memory = 4209
can_sample_self.memory = 4209
nan
can_sample_self.memory = 4210
can_sample_self.memory = 4210
nan
can_sample_self.memory = 4211
can_sample_self.memory = 4211
nan
can_sample_self.memory = 4212
can_sample_self.memory = 4212
nan
can_sample_self.memory = 4213
can_sample_self.memory = 4213
nan
can_sample_self.memory = 4214
can_sample_self.memory = 4214
nan
can_sample_self.memory = 4215
can_sample_self.memory = 4215
nan
can_sample_self.memory = 4216
can_sample_self.memory = 4216
nan
can_sample_self.memory = 4217
can_sample_self.memory = 4217
nan
can_sample_self.memory = 4218
can_sample_self.memory = 4218
nan
can_sample_self.memory = 4219
can_sample_self.memory = 4219
nan
can_sample_self.memory = 4220
can_sample_self.memory = 4220
nan
can_sample_self.memory = 4221
can_sample_self.memory = 4221

 38%|███████████████████████████████▏                                                 | 77/200 [02:42<04:28,  2.18s/it]

nan
can_sample_self.memory = 4274
can_sample_self.memory = 4274
nan
can_sample_self.memory = 4275
can_sample_self.memory = 4275
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4276
can_sample_self.memory = 4276
nan
can_sample_self.memory = 4277
can_sample_self.memory = 4277
nan
can_sample_self.memory = 4278
can_sample_self.memory = 4278
nan
can_sample_self.memory = 4279
can_sample_self.memory = 4279
nan
can_sample_self.memory = 4280
can_sample_self.memory = 4280
nan
can_sample_self.memory = 4281
can_sample_self.memory = 4281
nan
can_sample_self.memory = 4282
can_sample_self.memory = 4282
nan
can_sample_self.memory = 4283
can_sample_self.memory = 4283
nan
can_sample_self.memory = 4284
can_sample_self.memory = 4284
nan
can_sample_self.memory = 4285
can_sample_self.memory = 4285
nan
can_sample_self.memory = 4286
can_sample_self.memory = 4286
nan
can_sample_self.memory = 4287
can_sample_self.memory = 4287
nan
can_sample_self.memory = 4288
can_sample_self.memory = 

 39%|███████████████████████████████▌                                                 | 78/200 [02:44<04:13,  2.08s/it]

nan
can_sample_self.memory = 4318
can_sample_self.memory = 4318
nan
can_sample_self.memory = 4319
can_sample_self.memory = 4319
nan
can_sample_self.memory = 4320
can_sample_self.memory = 4320
nan
can_sample_self.memory = 4321
can_sample_self.memory = 4321
nan
can_sample_self.memory = 4322
can_sample_self.memory = 4322
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4323
can_sample_self.memory = 4323
nan
can_sample_self.memory = 4324
can_sample_self.memory = 4324
nan
can_sample_self.memory = 4325
can_sample_self.memory = 4325
nan
can_sample_self.memory = 4326
can_sample_self.memory = 4326
nan
can_sample_self.memory = 4327
can_sample_self.memory = 4327
nan
can_sample_self.memory = 4328
can_sample_self.memory = 4328
nan
can_sample_self.memory = 4329
can_sample_self.memory = 4329
nan
can_sample_self.memory = 4330
can_sample_self.memory = 4330
nan
can_sample_self.memory = 4331
can_sample_self.memory = 4331
nan
can_sample_self.memory = 4332
can_sample_self.memory = 

 40%|███████████████████████████████▉                                                 | 79/200 [02:45<03:33,  1.77s/it]


nan
can_sample_self.memory = 4347
can_sample_self.memory = 4347
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4348
can_sample_self.memory = 4348
nan
can_sample_self.memory = 4349
can_sample_self.memory = 4349
nan
can_sample_self.memory = 4350
can_sample_self.memory = 4350
nan
can_sample_self.memory = 4351
can_sample_self.memory = 4351
nan
can_sample_self.memory = 4352
can_sample_self.memory = 4352
nan
can_sample_self.memory = 4353
can_sample_self.memory = 4353
nan
can_sample_self.memory = 4354
can_sample_self.memory = 4354
nan
can_sample_self.memory = 4355
can_sample_self.memory = 4355
nan
can_sample_self.memory = 4356
can_sample_self.memory = 4356
nan
can_sample_self.memory = 4357
can_sample_self.memory = 4357
nan
can_sample_self.memory = 4358
can_sample_self.memory = 4358
nan
can_sample_self.memory = 4359
can_sample_self.memory = 4359
nan
can_sample_self.memory = 4360
can_sample_self.memory = 4360
nan
can_sample_self.memory = 4361
can_sample_self.memory =

 40%|████████████████████████████████▍                                                | 80/200 [02:47<03:37,  1.81s/it]

nan
can_sample_self.memory = 4395
can_sample_self.memory = 4395
nan
can_sample_self.memory = 4396
can_sample_self.memory = 4396
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4397
can_sample_self.memory = 4397
nan
can_sample_self.memory = 4398
can_sample_self.memory = 4398
nan
can_sample_self.memory = 4399
can_sample_self.memory = 4399
nan
can_sample_self.memory = 4400
can_sample_self.memory = 4400
nan
can_sample_self.memory = 4401
can_sample_self.memory = 4401
nan
can_sample_self.memory = 4402
can_sample_self.memory = 4402
nan
can_sample_self.memory = 4403
can_sample_self.memory = 4403
nan
can_sample_self.memory = 4404
can_sample_self.memory = 4404
nan
can_sample_self.memory = 4405
can_sample_self.memory = 4405
nan
can_sample_self.memory = 4406
can_sample_self.memory = 4406
nan
can_sample_self.memory = 4407
can_sample_self.memory = 4407
nan
can_sample_self.memory = 4408
can_sample_self.memory = 4408
nan
can_sample_self.memory = 4409
can_sample_self.memory = 

nan
can_sample_self.memory = 4527
can_sample_self.memory = 4527
nan
can_sample_self.memory = 4528
can_sample_self.memory = 4528
nan
can_sample_self.memory = 4529
can_sample_self.memory = 4529
nan
can_sample_self.memory = 4530
can_sample_self.memory = 4530
nan
can_sample_self.memory = 4531
can_sample_self.memory = 4531
nan
can_sample_self.memory = 4532
can_sample_self.memory = 4532


 40%|████████████████████████████████▊                                                | 81/200 [02:52<05:35,  2.82s/it]

nan
can_sample_self.memory = 4533
can_sample_self.memory = 4533
nan
can_sample_self.memory = 4534
can_sample_self.memory = 4534
nan
can_sample_self.memory = 4535
can_sample_self.memory = 4535
nan
can_sample_self.memory = 4536
can_sample_self.memory = 4536
nan
can_sample_self.memory = 4537
can_sample_self.memory = 4537
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4538
can_sample_self.memory = 4538
nan
can_sample_self.memory = 4539
can_sample_self.memory = 4539
nan
can_sample_self.memory = 4540
can_sample_self.memory = 4540
nan
can_sample_self.memory = 4541
can_sample_self.memory = 4541
nan
can_sample_self.memory = 4542
can_sample_self.memory = 4542
nan
can_sample_self.memory = 4543
can_sample_self.memory = 4543
nan
can_sample_self.memory = 4544
can_sample_self.memory = 4544
nan
can_sample_self.memory = 4545
can_sample_self.memory = 4545
nan
can_sample_self.memory = 4546
can_sample_self.memory = 4546
nan
can_sample_self.memory = 4547
can_sample_self.memory = 

 41%|█████████████████████████████████▏                                               | 82/200 [02:56<06:26,  3.28s/it]

nan
can_sample_self.memory = 4655
can_sample_self.memory = 4655
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4656
can_sample_self.memory = 4656
nan
can_sample_self.memory = 4657
can_sample_self.memory = 4657
nan
can_sample_self.memory = 4658
can_sample_self.memory = 4658
nan
can_sample_self.memory = 4659
can_sample_self.memory = 4659
nan
can_sample_self.memory = 4660
can_sample_self.memory = 4660
nan
can_sample_self.memory = 4661
can_sample_self.memory = 4661
nan
can_sample_self.memory = 4662
can_sample_self.memory = 4662
nan
can_sample_self.memory = 4663
can_sample_self.memory = 4663
nan
can_sample_self.memory = 4664
can_sample_self.memory = 4664
nan
can_sample_self.memory = 4665
can_sample_self.memory = 4665
nan
can_sample_self.memory = 4666
can_sample_self.memory = 4666
nan
can_sample_self.memory = 4667
can_sample_self.memory = 4667
nan
can_sample_self.memory = 4668
can_sample_self.memory = 4668
nan
can_sample_self.memory = 4669
can_sample_self.memory = 

 42%|█████████████████████████████████▌                                               | 83/200 [02:58<05:34,  2.86s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4704
can_sample_self.memory = 4704
nan
can_sample_self.memory = 4705
can_sample_self.memory = 4705
nan
can_sample_self.memory = 4706
can_sample_self.memory = 4706
nan
can_sample_self.memory = 4707
can_sample_self.memory = 4707
nan
can_sample_self.memory = 4708
can_sample_self.memory = 4708
nan
can_sample_self.memory = 4709
can_sample_self.memory = 4709
nan
can_sample_self.memory = 4710
can_sample_self.memory = 4710
nan
can_sample_self.memory = 4711
can_sample_self.memory = 4711
nan
can_sample_self.memory = 4712
can_sample_self.memory = 4712
nan
can_sample_self.memory = 4713
can_sample_self.memory = 4713
nan
can_sample_self.memory = 4714
can_sample_self.memory = 4714
nan
can_sample_self.memory = 4715
can_sample_self.memory = 4715
nan
can_sample_self.memory = 4716
can_sample_self.memory = 4716
nan
can_sample_self.memory = 4717
can_sample_self.memory = 4717
nan
can_sample_self.memory = 4718
can_sample_self.memory = 

 42%|██████████████████████████████████                                               | 84/200 [03:00<04:48,  2.48s/it]

can_sample_self.memory = 4739
can_sample_self.memory = 4739
nan
can_sample_self.memory = 4740
can_sample_self.memory = 4740
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4741
can_sample_self.memory = 4741
nan
can_sample_self.memory = 4742
can_sample_self.memory = 4742
nan
can_sample_self.memory = 4743
can_sample_self.memory = 4743
nan
can_sample_self.memory = 4744
can_sample_self.memory = 4744
nan
can_sample_self.memory = 4745
can_sample_self.memory = 4745
nan
can_sample_self.memory = 4746
can_sample_self.memory = 4746
nan
can_sample_self.memory = 4747
can_sample_self.memory = 4747
nan
can_sample_self.memory = 4748
can_sample_self.memory = 4748
nan
can_sample_self.memory = 4749
can_sample_self.memory = 4749
nan
can_sample_self.memory = 4750
can_sample_self.memory = 4750
nan
can_sample_self.memory = 4751
can_sample_self.memory = 4751
nan
can_sample_self.memory = 4752
can_sample_self.memory = 4752
nan
can_sample_self.memory = 4753
can_sample_self.memory = 4753

 42%|██████████████████████████████████▍                                              | 85/200 [03:02<04:55,  2.57s/it]

can_sample_self.memory = 4812
can_sample_self.memory = 4812
nan
can_sample_self.memory = 4813
can_sample_self.memory = 4813
nan
can_sample_self.memory = 4814
can_sample_self.memory = 4814
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4815
can_sample_self.memory = 4815
nan
can_sample_self.memory = 4816
can_sample_self.memory = 4816
nan
can_sample_self.memory = 4817
can_sample_self.memory = 4817
nan
can_sample_self.memory = 4818
can_sample_self.memory = 4818
nan
can_sample_self.memory = 4819
can_sample_self.memory = 4819
nan
can_sample_self.memory = 4820
can_sample_self.memory = 4820
nan
can_sample_self.memory = 4821
can_sample_self.memory = 4821
nan
can_sample_self.memory = 4822
can_sample_self.memory = 4822
nan
can_sample_self.memory = 4823
can_sample_self.memory = 4823
nan
can_sample_self.memory = 4824
can_sample_self.memory = 4824
nan
can_sample_self.memory = 4825
can_sample_self.memory = 4825
nan
can_sample_self.memory = 4826
can_sample_self.memory = 4826

 43%|██████████████████████████████████▊                                              | 86/200 [03:06<05:13,  2.75s/it]

nan
can_sample_self.memory = 4896
can_sample_self.memory = 4896
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4897
can_sample_self.memory = 4897
nan
can_sample_self.memory = 4898
can_sample_self.memory = 4898
nan
can_sample_self.memory = 4899
can_sample_self.memory = 4899
nan
can_sample_self.memory = 4900
can_sample_self.memory = 4900
nan
can_sample_self.memory = 4901
can_sample_self.memory = 4901
nan
can_sample_self.memory = 4902
can_sample_self.memory = 4902
nan
can_sample_self.memory = 4903
can_sample_self.memory = 4903
nan
can_sample_self.memory = 4904
can_sample_self.memory = 4904
nan
can_sample_self.memory = 4905
can_sample_self.memory = 4905
nan
can_sample_self.memory = 4906
can_sample_self.memory = 4906
nan
can_sample_self.memory = 4907
can_sample_self.memory = 4907
nan
can_sample_self.memory = 4908
can_sample_self.memory = 4908
nan
can_sample_self.memory = 4909
can_sample_self.memory = 4909
nan
can_sample_self.memory = 4910
can_sample_self.memory = 

 44%|███████████████████████████████████▏                                             | 87/200 [03:09<05:47,  3.07s/it]

can_sample_self.memory = 4994
can_sample_self.memory = 4994
nan
can_sample_self.memory = 4995
can_sample_self.memory = 4995
nan
can_sample_self.memory = 4996
can_sample_self.memory = 4996
nan
can_sample_self.memory = 4997
can_sample_self.memory = 4997
nan
can_sample_self.memory = 4998
can_sample_self.memory = 4998
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 4999
can_sample_self.memory = 4999
nan
can_sample_self.memory = 5000
can_sample_self.memory = 5000
nan
can_sample_self.memory = 5001
can_sample_self.memory = 5001
nan
can_sample_self.memory = 5002
can_sample_self.memory = 5002
nan
can_sample_self.memory = 5003
can_sample_self.memory = 5003
nan
can_sample_self.memory = 5004
can_sample_self.memory = 5004


 44%|███████████████████████████████████▋                                             | 88/200 [03:10<04:15,  2.28s/it]

nan
can_sample_self.memory = 5005
can_sample_self.memory = 5005
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5006
can_sample_self.memory = 5006
nan
can_sample_self.memory = 5007
can_sample_self.memory = 5007
nan
can_sample_self.memory = 5008
can_sample_self.memory = 5008
nan
can_sample_self.memory = 5009
can_sample_self.memory = 5009
nan
can_sample_self.memory = 5010
can_sample_self.memory = 5010
nan
can_sample_self.memory = 5011
can_sample_self.memory = 5011
nan
can_sample_self.memory = 5012
can_sample_self.memory = 5012
nan
can_sample_self.memory = 5013
can_sample_self.memory = 5013
nan
can_sample_self.memory = 5014
can_sample_self.memory = 5014
nan
can_sample_self.memory = 5015
can_sample_self.memory = 5015
nan
can_sample_self.memory = 5016
can_sample_self.memory = 5016
nan
can_sample_self.memory = 5017
can_sample_self.memory = 5017
nan
can_sample_self.memory = 5018
can_sample_self.memory = 5018
nan
can_sample_self.memory = 5019
can_sample_self.memory = 

 44%|████████████████████████████████████                                             | 89/200 [03:11<03:40,  1.99s/it]

nan
can_sample_self.memory = 5035
can_sample_self.memory = 5035
nan
can_sample_self.memory = 5036
can_sample_self.memory = 5036
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5037
can_sample_self.memory = 5037
nan
can_sample_self.memory = 5038
can_sample_self.memory = 5038
nan
can_sample_self.memory = 5039
can_sample_self.memory = 5039
nan
can_sample_self.memory = 5040
can_sample_self.memory = 5040
nan
can_sample_self.memory = 5041
can_sample_self.memory = 5041
nan
can_sample_self.memory = 5042
can_sample_self.memory = 5042
nan
can_sample_self.memory = 5043
can_sample_self.memory = 5043
nan
can_sample_self.memory = 5044
can_sample_self.memory = 5044
nan
can_sample_self.memory = 5045
can_sample_self.memory = 5045
nan
can_sample_self.memory = 5046
can_sample_self.memory = 5046
nan
can_sample_self.memory = 5047
can_sample_self.memory = 5047
nan
can_sample_self.memory = 5048
can_sample_self.memory = 5048
nan
can_sample_self.memory = 5049
can_sample_self.memory = 

can_sample_self.memory = 5166
can_sample_self.memory = 5166
nan
can_sample_self.memory = 5167
can_sample_self.memory = 5167
nan
can_sample_self.memory = 5168
can_sample_self.memory = 5168
nan
can_sample_self.memory = 5169
can_sample_self.memory = 5169
nan
can_sample_self.memory = 5170
can_sample_self.memory = 5170
nan
can_sample_self.memory = 5171
can_sample_self.memory = 5171


 45%|████████████████████████████████████▍                                            | 90/200 [03:16<05:19,  2.91s/it]

nan
can_sample_self.memory = 5172
can_sample_self.memory = 5172
nan
can_sample_self.memory = 5173
can_sample_self.memory = 5173
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5174
can_sample_self.memory = 5174
nan
can_sample_self.memory = 5175
can_sample_self.memory = 5175
nan
can_sample_self.memory = 5176
can_sample_self.memory = 5176
nan
can_sample_self.memory = 5177
can_sample_self.memory = 5177
nan
can_sample_self.memory = 5178
can_sample_self.memory = 5178
nan
can_sample_self.memory = 5179
can_sample_self.memory = 5179
nan
can_sample_self.memory = 5180
can_sample_self.memory = 5180
nan
can_sample_self.memory = 5181
can_sample_self.memory = 5181
nan
can_sample_self.memory = 5182
can_sample_self.memory = 5182
nan
can_sample_self.memory = 5183
can_sample_self.memory = 5183
nan
can_sample_self.memory = 5184
can_sample_self.memory = 5184
nan
can_sample_self.memory = 5185
can_sample_self.memory = 5185
nan
can_sample_self.memory = 5186
can_sample_self.memory = 

 46%|████████████████████████████████████▊                                            | 91/200 [03:19<05:28,  3.01s/it]

can_sample_self.memory = 5255
can_sample_self.memory = 5255
nan
can_sample_self.memory = 5256
can_sample_self.memory = 5256
nan
can_sample_self.memory = 5257
can_sample_self.memory = 5257
nan
can_sample_self.memory = 5258
can_sample_self.memory = 5258
nan
can_sample_self.memory = 5259
can_sample_self.memory = 5259
nan
can_sample_self.memory = 5260
can_sample_self.memory = 5260
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5261
can_sample_self.memory = 5261
nan
can_sample_self.memory = 5262
can_sample_self.memory = 5262
nan
can_sample_self.memory = 5263
can_sample_self.memory = 5263
nan
can_sample_self.memory = 5264
can_sample_self.memory = 5264
nan
can_sample_self.memory = 5265
can_sample_self.memory = 5265
nan
can_sample_self.memory = 5266
can_sample_self.memory = 5266
nan
can_sample_self.memory = 5267
can_sample_self.memory = 5267
nan
can_sample_self.memory = 5268
can_sample_self.memory = 5268
nan
can_sample_self.memory = 5269
can_sample_self.memory = 5269

 46%|█████████████████████████████████████▎                                           | 92/200 [03:20<04:19,  2.41s/it]

nan
can_sample_self.memory = 5281
can_sample_self.memory = 5281
nan
can_sample_self.memory = 5282
can_sample_self.memory = 5282
nan
can_sample_self.memory = 5283
can_sample_self.memory = 5283
nan
can_sample_self.memory = 5284
can_sample_self.memory = 5284
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5285
can_sample_self.memory = 5285
nan
can_sample_self.memory = 5286
can_sample_self.memory = 5286
nan
can_sample_self.memory = 5287
can_sample_self.memory = 5287
nan
can_sample_self.memory = 5288
can_sample_self.memory = 5288
nan
can_sample_self.memory = 5289
can_sample_self.memory = 5289
nan
can_sample_self.memory = 5290
can_sample_self.memory = 5290


 46%|█████████████████████████████████████▋                                           | 93/200 [03:21<03:17,  1.85s/it]

nan
can_sample_self.memory = 5291
can_sample_self.memory = 5291
nan
can_sample_self.memory = 5292
can_sample_self.memory = 5292
nan
can_sample_self.memory = 5293
can_sample_self.memory = 5293
nan
can_sample_self.memory = 5294
can_sample_self.memory = 5294
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5295
can_sample_self.memory = 5295
nan
can_sample_self.memory = 5296
can_sample_self.memory = 5296
nan
can_sample_self.memory = 5297
can_sample_self.memory = 5297
nan
can_sample_self.memory = 5298
can_sample_self.memory = 5298
nan
can_sample_self.memory = 5299
can_sample_self.memory = 5299
nan
can_sample_self.memory = 5300
can_sample_self.memory = 5300
nan
can_sample_self.memory = 5301
can_sample_self.memory = 5301
nan
can_sample_self.memory = 5302
can_sample_self.memory = 5302
nan
can_sample_self.memory = 5303
can_sample_self.memory = 5303
nan
can_sample_self.memory = 5304
can_sample_self.memory = 5304
nan
can_sample_self.memory = 5305
can_sample_self.memory = 

nan
can_sample_self.memory = 5424
can_sample_self.memory = 5424
nan
can_sample_self.memory = 5425
can_sample_self.memory = 5425
nan
can_sample_self.memory = 5426
can_sample_self.memory = 5426
nan
can_sample_self.memory = 5427
can_sample_self.memory = 5427
nan
can_sample_self.memory = 5428
can_sample_self.memory = 5428
nan
can_sample_self.memory = 5429
can_sample_self.memory = 5429
nan
can_sample_self.memory = 5430
can_sample_self.memory = 5430
nan
can_sample_self.memory = 5431
can_sample_self.memory = 5431
nan
can_sample_self.memory = 5432
can_sample_self.memory = 5432
nan
can_sample_self.memory = 5433
can_sample_self.memory = 5433
nan
can_sample_self.memory = 5434
can_sample_self.memory = 5434
nan
can_sample_self.memory = 5435
can_sample_self.memory = 5435
nan
can_sample_self.memory = 5436
can_sample_self.memory = 5436
nan
can_sample_self.memory = 5437
can_sample_self.memory = 5437
nan
can_sample_self.memory = 5438
can_sample_self.memory = 5438
nan
can_sample_self.memory = 5439
can_sa

 47%|██████████████████████████████████████                                           | 94/200 [03:31<07:23,  4.18s/it]

nan
can_sample_self.memory = 5556
can_sample_self.memory = 5556
nan
can_sample_self.memory = 5557
can_sample_self.memory = 5557
nan
can_sample_self.memory = 5558
can_sample_self.memory = 5558
nan
can_sample_self.memory = 5559
can_sample_self.memory = 5559
nan
can_sample_self.memory = 5560
can_sample_self.memory = 5560
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5561
can_sample_self.memory = 5561
nan
can_sample_self.memory = 5562
can_sample_self.memory = 5562
nan
can_sample_self.memory = 5563
can_sample_self.memory = 5563
nan
can_sample_self.memory = 5564
can_sample_self.memory = 5564
nan
can_sample_self.memory = 5565
can_sample_self.memory = 5565
nan
can_sample_self.memory = 5566
can_sample_self.memory = 5566
nan
can_sample_self.memory = 5567
can_sample_self.memory = 5567
nan
can_sample_self.memory = 5568
can_sample_self.memory = 5568
nan
can_sample_self.memory = 5569
can_sample_self.memory = 5569
nan
can_sample_self.memory = 5570
can_sample_self.memory = 

 48%|██████████████████████████████████████▍                                          | 95/200 [03:32<05:40,  3.25s/it]

nan
can_sample_self.memory = 5585
can_sample_self.memory = 5585
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5586
can_sample_self.memory = 5586
nan
can_sample_self.memory = 5587
can_sample_self.memory = 5587
nan
can_sample_self.memory = 5588
can_sample_self.memory = 5588
nan
can_sample_self.memory = 5589
can_sample_self.memory = 5589
nan
can_sample_self.memory = 5590
can_sample_self.memory = 5590
nan
can_sample_self.memory = 5591
can_sample_self.memory = 5591
nan
can_sample_self.memory = 5592
can_sample_self.memory = 5592
nan
can_sample_self.memory = 5593
can_sample_self.memory = 5593
nan
can_sample_self.memory = 5594
can_sample_self.memory = 5594
nan
can_sample_self.memory = 5595
can_sample_self.memory = 5595
nan
can_sample_self.memory = 5596
can_sample_self.memory = 5596
nan
can_sample_self.memory = 5597
can_sample_self.memory = 5597
nan
can_sample_self.memory = 5598
can_sample_self.memory = 5598
nan
can_sample_self.memory = 5599
can_sample_self.memory = 

 48%|██████████████████████████████████████▉                                          | 96/200 [03:34<05:16,  3.05s/it]

nan
can_sample_self.memory = 5651
can_sample_self.memory = 5651
nan
can_sample_self.memory = 5652
can_sample_self.memory = 5652
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5653
can_sample_self.memory = 5653
nan
can_sample_self.memory = 5654
can_sample_self.memory = 5654
nan
can_sample_self.memory = 5655
can_sample_self.memory = 5655
nan
can_sample_self.memory = 5656
can_sample_self.memory = 5656
nan
can_sample_self.memory = 5657
can_sample_self.memory = 5657
nan
can_sample_self.memory = 5658
can_sample_self.memory = 5658
nan
can_sample_self.memory = 5659
can_sample_self.memory = 5659
nan
can_sample_self.memory = 5660
can_sample_self.memory = 5660
nan
can_sample_self.memory = 5661
can_sample_self.memory = 5661
nan
can_sample_self.memory = 5662
can_sample_self.memory = 5662
nan
can_sample_self.memory = 5663
can_sample_self.memory = 5663
nan
can_sample_self.memory = 5664
can_sample_self.memory = 5664
nan
can_sample_self.memory = 5665
can_sample_self.memory = 

 48%|███████████████████████████████████████▎                                         | 97/200 [03:35<04:05,  2.38s/it]

can_sample_self.memory = 5670
can_sample_self.memory = 5670
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5671
can_sample_self.memory = 5671
nan
can_sample_self.memory = 5672
can_sample_self.memory = 5672
nan
can_sample_self.memory = 5673
can_sample_self.memory = 5673
nan
can_sample_self.memory = 5674
can_sample_self.memory = 5674
nan
can_sample_self.memory = 5675
can_sample_self.memory = 5675
nan
can_sample_self.memory = 5676
can_sample_self.memory = 5676
nan
can_sample_self.memory = 5677
can_sample_self.memory = 5677
nan
can_sample_self.memory = 5678
can_sample_self.memory = 5678
nan
can_sample_self.memory = 5679
can_sample_self.memory = 5679
nan
can_sample_self.memory = 5680
can_sample_self.memory = 5680
nan
can_sample_self.memory = 5681
can_sample_self.memory = 5681
nan
can_sample_self.memory = 5682
can_sample_self.memory = 5682
nan
can_sample_self.memory = 5683
can_sample_self.memory = 5683
nan
can_sample_self.memory = 5684
can_sample_self.memory = 5684

 49%|███████████████████████████████████████▋                                         | 98/200 [03:38<04:33,  2.68s/it]

nan
can_sample_self.memory = 5761
can_sample_self.memory = 5761
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5762
can_sample_self.memory = 5762
nan
can_sample_self.memory = 5763
can_sample_self.memory = 5763
nan
can_sample_self.memory = 5764
can_sample_self.memory = 5764
nan
can_sample_self.memory = 5765
can_sample_self.memory = 5765
nan
can_sample_self.memory = 5766
can_sample_self.memory = 5766
nan
can_sample_self.memory = 5767
can_sample_self.memory = 5767
nan
can_sample_self.memory = 5768
can_sample_self.memory = 5768
nan
can_sample_self.memory = 5769
can_sample_self.memory = 5769
nan
can_sample_self.memory = 5770
can_sample_self.memory = 5770
nan
can_sample_self.memory = 5771
can_sample_self.memory = 5771
nan
can_sample_self.memory = 5772
can_sample_self.memory = 5772
nan
can_sample_self.memory = 5773
can_sample_self.memory = 5773
nan


 50%|████████████████████████████████████████                                         | 99/200 [03:39<03:34,  2.12s/it]

can_sample_self.memory = 5774
can_sample_self.memory = 5774
nan
can_sample_self.memory = 5775
can_sample_self.memory = 5775
nan
can_sample_self.memory = 5776
can_sample_self.memory = 5776
nan
can_sample_self.memory = 5777
can_sample_self.memory = 5777
nan
can_sample_self.memory = 5778
can_sample_self.memory = 5778
nan
can_sample_self.memory = 5779
can_sample_self.memory = 5779
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5780
can_sample_self.memory = 5780
nan
can_sample_self.memory = 5781
can_sample_self.memory = 5781
nan
can_sample_self.memory = 5782
can_sample_self.memory = 5782
nan
can_sample_self.memory = 5783
can_sample_self.memory = 5783
nan
can_sample_self.memory = 5784
can_sample_self.memory = 5784
nan
can_sample_self.memory = 5785
can_sample_self.memory = 5785
nan
can_sample_self.memory = 5786
can_sample_self.memory = 5786
nan
can_sample_self.memory = 5787
can_sample_self.memory = 5787
nan
can_sample_self.memory = 5788
can_sample_self.memory = 5788

 50%|████████████████████████████████████████                                        | 100/200 [03:43<04:22,  2.63s/it]

can_sample_self.memory = 5880
can_sample_self.memory = 5880
nan
can_sample_self.memory = 5881
can_sample_self.memory = 5881
nan
can_sample_self.memory = 5882
can_sample_self.memory = 5882
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5883
can_sample_self.memory = 5883
nan
can_sample_self.memory = 5884
can_sample_self.memory = 5884
nan
can_sample_self.memory = 5885
can_sample_self.memory = 5885
nan
can_sample_self.memory = 5886
can_sample_self.memory = 5886
nan
can_sample_self.memory = 5887
can_sample_self.memory = 5887
nan
can_sample_self.memory = 5888
can_sample_self.memory = 5888
nan
can_sample_self.memory = 5889
can_sample_self.memory = 5889
nan
can_sample_self.memory = 5890
can_sample_self.memory = 5890
nan
can_sample_self.memory = 5891
can_sample_self.memory = 5891
nan
can_sample_self.memory = 5892
can_sample_self.memory = 5892
nan
can_sample_self.memory = 5893
can_sample_self.memory = 5893
nan
can_sample_self.memory = 5894
can_sample_self.memory = 5894

 50%|████████████████████████████████████████▍                                       | 101/200 [03:45<04:02,  2.45s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5935
can_sample_self.memory = 5935
nan
can_sample_self.memory = 5936
can_sample_self.memory = 5936
nan
can_sample_self.memory = 5937
can_sample_self.memory = 5937
nan
can_sample_self.memory = 5938
can_sample_self.memory = 5938
nan
can_sample_self.memory = 5939
can_sample_self.memory = 5939
nan
can_sample_self.memory = 5940
can_sample_self.memory = 5940
nan
can_sample_self.memory = 5941
can_sample_self.memory = 5941
nan
can_sample_self.memory = 5942
can_sample_self.memory = 5942
nan
can_sample_self.memory = 5943
can_sample_self.memory = 5943
nan
can_sample_self.memory = 5944
can_sample_self.memory = 5944
nan
can_sample_self.memory = 5945
can_sample_self.memory = 5945
nan
can_sample_self.memory = 5946
can_sample_self.memory = 5946
nan
can_sample_self.memory = 5947
can_sample_self.memory = 5947
nan
can_sample_self.memory = 5948
can_sample_self.memory = 5948
nan
can_sample_self.memory = 5949
can_sample_self.memory = 

 51%|████████████████████████████████████████▊                                       | 102/200 [03:46<03:28,  2.13s/it]

can_sample_self.memory = 5966
can_sample_self.memory = 5966
nan
can_sample_self.memory = 5967
can_sample_self.memory = 5967
nan
can_sample_self.memory = 5968
can_sample_self.memory = 5968
nan
can_sample_self.memory = 5969
can_sample_self.memory = 5969
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 5970
can_sample_self.memory = 5970
nan
can_sample_self.memory = 5971
can_sample_self.memory = 5971
nan
can_sample_self.memory = 5972
can_sample_self.memory = 5972
nan
can_sample_self.memory = 5973
can_sample_self.memory = 5973
nan
can_sample_self.memory = 5974
can_sample_self.memory = 5974
nan
can_sample_self.memory = 5975
can_sample_self.memory = 5975
nan
can_sample_self.memory = 5976
can_sample_self.memory = 5976
nan
can_sample_self.memory = 5977
can_sample_self.memory = 5977
nan
can_sample_self.memory = 5978
can_sample_self.memory = 5978
nan
can_sample_self.memory = 5979
can_sample_self.memory = 5979
nan
can_sample_self.memory = 5980
can_sample_self.memory = 5980

 52%|█████████████████████████████████████████▏                                      | 103/200 [03:49<03:25,  2.12s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6022
can_sample_self.memory = 6022
nan
can_sample_self.memory = 6023
can_sample_self.memory = 6023
nan
can_sample_self.memory = 6024
can_sample_self.memory = 6024
nan
can_sample_self.memory = 6025
can_sample_self.memory = 6025
nan
can_sample_self.memory = 6026
can_sample_self.memory = 6026
nan
can_sample_self.memory = 6027
can_sample_self.memory = 6027
nan
can_sample_self.memory = 6028
can_sample_self.memory = 6028
nan
can_sample_self.memory = 6029
can_sample_self.memory = 6029
nan
can_sample_self.memory = 6030
can_sample_self.memory = 6030
nan
can_sample_self.memory = 6031
can_sample_self.memory = 6031
nan
can_sample_self.memory = 6032
can_sample_self.memory = 6032
nan
can_sample_self.memory = 6033
can_sample_self.memory = 6033
nan
can_sample_self.memory = 6034
can_sample_self.memory = 6034
nan
can_sample_self.memory = 6035
can_sample_self.memory = 6035
nan
can_sample_self.memory = 6036
can_sample_self.memory = 

 52%|█████████████████████████████████████████▌                                      | 104/200 [03:53<04:28,  2.79s/it]

can_sample_self.memory = 6139
can_sample_self.memory = 6139
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6140
can_sample_self.memory = 6140
nan
can_sample_self.memory = 6141
can_sample_self.memory = 6141
nan
can_sample_self.memory = 6142
can_sample_self.memory = 6142
nan
can_sample_self.memory = 6143
can_sample_self.memory = 6143
nan
can_sample_self.memory = 6144
can_sample_self.memory = 6144
nan
can_sample_self.memory = 6145
can_sample_self.memory = 6145
nan
can_sample_self.memory = 6146
can_sample_self.memory = 6146
nan
can_sample_self.memory = 6147
can_sample_self.memory = 6147
nan
can_sample_self.memory = 6148
can_sample_self.memory = 6148
nan
can_sample_self.memory = 6149
can_sample_self.memory = 6149
nan
can_sample_self.memory = 6150
can_sample_self.memory = 6150
nan
can_sample_self.memory = 6151
can_sample_self.memory = 6151
nan
can_sample_self.memory = 6152
can_sample_self.memory = 6152
nan
can_sample_self.memory = 6153
can_sample_self.memory = 6153

nan
can_sample_self.memory = 6271
can_sample_self.memory = 6271
nan
can_sample_self.memory = 6272
can_sample_self.memory = 6272
nan
can_sample_self.memory = 6273
can_sample_self.memory = 6273
nan
can_sample_self.memory = 6274
can_sample_self.memory = 6274
nan
can_sample_self.memory = 6275
can_sample_self.memory = 6275
nan
can_sample_self.memory = 6276
can_sample_self.memory = 6276
nan
can_sample_self.memory = 6277
can_sample_self.memory = 6277
nan
can_sample_self.memory = 6278
can_sample_self.memory = 6278
nan
can_sample_self.memory = 6279
can_sample_self.memory = 6279
nan
can_sample_self.memory = 6280
can_sample_self.memory = 6280
nan
can_sample_self.memory = 6281
can_sample_self.memory = 6281
nan
can_sample_self.memory = 6282
can_sample_self.memory = 6282
nan
can_sample_self.memory = 6283
can_sample_self.memory = 6283
nan
can_sample_self.memory = 6284
can_sample_self.memory = 6284
nan
can_sample_self.memory = 6285
can_sample_self.memory = 6285
nan
can_sample_self.memory = 6286
can_sa

 52%|██████████████████████████████████████████                                      | 105/200 [04:01<06:47,  4.29s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6353
can_sample_self.memory = 6353
nan
can_sample_self.memory = 6354
can_sample_self.memory = 6354
nan
can_sample_self.memory = 6355
can_sample_self.memory = 6355
nan
can_sample_self.memory = 6356
can_sample_self.memory = 6356
nan
can_sample_self.memory = 6357
can_sample_self.memory = 6357
nan
can_sample_self.memory = 6358
can_sample_self.memory = 6358
nan
can_sample_self.memory = 6359
can_sample_self.memory = 6359
nan
can_sample_self.memory = 6360
can_sample_self.memory = 6360
nan
can_sample_self.memory = 6361
can_sample_self.memory = 6361
nan
can_sample_self.memory = 6362
can_sample_self.memory = 6362
nan
can_sample_self.memory = 6363
can_sample_self.memory = 6363
nan
can_sample_self.memory = 6364
can_sample_self.memory = 6364
nan
can_sample_self.memory = 6365
can_sample_self.memory = 6365
nan
can_sample_self.memory = 6366
can_sample_self.memory = 6366
nan
can_sample_self.memory = 6367
can_sample_self.memory = 

 53%|██████████████████████████████████████████▍                                     | 106/200 [04:04<06:11,  3.95s/it]

nan
can_sample_self.memory = 6433
can_sample_self.memory = 6433
nan
can_sample_self.memory = 6434
can_sample_self.memory = 6434
nan
can_sample_self.memory = 6435
can_sample_self.memory = 6435
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6436
can_sample_self.memory = 6436
nan
can_sample_self.memory = 6437
can_sample_self.memory = 6437
nan
can_sample_self.memory = 6438
can_sample_self.memory = 6438
nan
can_sample_self.memory = 6439
can_sample_self.memory = 6439
nan
can_sample_self.memory = 6440
can_sample_self.memory = 6440
nan
can_sample_self.memory = 6441
can_sample_self.memory = 6441
nan
can_sample_self.memory = 6442
can_sample_self.memory = 6442
nan
can_sample_self.memory = 6443
can_sample_self.memory = 6443
nan
can_sample_self.memory = 6444
can_sample_self.memory = 6444
nan
can_sample_self.memory = 6445
can_sample_self.memory = 6445
nan
can_sample_self.memory = 6446
can_sample_self.memory = 6446
nan
can_sample_self.memory = 6447
can_sample_self.memory = 

 54%|██████████████████████████████████████████▊                                     | 107/200 [04:05<04:55,  3.18s/it]

nan
can_sample_self.memory = 6465
can_sample_self.memory = 6465
nan
can_sample_self.memory = 6466
can_sample_self.memory = 6466
nan
can_sample_self.memory = 6467
can_sample_self.memory = 6467
nan
can_sample_self.memory = 6468
can_sample_self.memory = 6468
nan
can_sample_self.memory = 6469
can_sample_self.memory = 6469
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6470
can_sample_self.memory = 6470
nan
can_sample_self.memory = 6471
can_sample_self.memory = 6471
nan
can_sample_self.memory = 6472
can_sample_self.memory = 6472
nan
can_sample_self.memory = 6473
can_sample_self.memory = 6473
nan
can_sample_self.memory = 6474
can_sample_self.memory = 6474
nan
can_sample_self.memory = 6475
can_sample_self.memory = 6475
nan
can_sample_self.memory = 6476
can_sample_self.memory = 6476
nan
can_sample_self.memory = 6477
can_sample_self.memory = 6477
nan
can_sample_self.memory = 6478
can_sample_self.memory = 6478
nan
can_sample_self.memory = 6479
can_sample_self.memory = 

 54%|███████████████████████████████████████████▏                                    | 108/200 [04:06<03:50,  2.50s/it]

nan
can_sample_self.memory = 6488
can_sample_self.memory = 6488
nan
can_sample_self.memory = 6489
can_sample_self.memory = 6489
nan
can_sample_self.memory = 6490
can_sample_self.memory = 6490
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6491
can_sample_self.memory = 6491
nan
can_sample_self.memory = 6492
can_sample_self.memory = 6492
nan
can_sample_self.memory = 6493
can_sample_self.memory = 6493
nan
can_sample_self.memory = 6494
can_sample_self.memory = 6494
nan
can_sample_self.memory = 6495
can_sample_self.memory = 6495
nan
can_sample_self.memory = 6496
can_sample_self.memory = 6496
nan
can_sample_self.memory = 6497
can_sample_self.memory = 6497
nan
can_sample_self.memory = 6498
can_sample_self.memory = 6498
nan
can_sample_self.memory = 6499
can_sample_self.memory = 6499
nan
can_sample_self.memory = 6500
can_sample_self.memory = 6500
nan
can_sample_self.memory = 6501
can_sample_self.memory = 6501
nan
can_sample_self.memory = 6502
can_sample_self.memory = 

 55%|███████████████████████████████████████████▌                                    | 109/200 [04:09<04:07,  2.72s/it]

nan
can_sample_self.memory = 6572
can_sample_self.memory = 6572
nan
can_sample_self.memory = 6573
can_sample_self.memory = 6573
nan
can_sample_self.memory = 6574
can_sample_self.memory = 6574
nan
can_sample_self.memory = 6575
can_sample_self.memory = 6575
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6576
can_sample_self.memory = 6576
nan
can_sample_self.memory = 6577
can_sample_self.memory = 6577
nan
can_sample_self.memory = 6578
can_sample_self.memory = 6578
nan
can_sample_self.memory = 6579
can_sample_self.memory = 6579
nan
can_sample_self.memory = 6580
can_sample_self.memory = 6580
nan
can_sample_self.memory = 6581
can_sample_self.memory = 6581
nan
can_sample_self.memory = 6582
can_sample_self.memory = 6582
nan
can_sample_self.memory = 6583
can_sample_self.memory = 6583
nan
can_sample_self.memory = 6584
can_sample_self.memory = 6584
nan
can_sample_self.memory = 6585
can_sample_self.memory = 6585
nan
can_sample_self.memory = 6586
can_sample_self.memory = 

 55%|████████████████████████████████████████████                                    | 110/200 [04:11<03:33,  2.37s/it]

can_sample_self.memory = 6609
can_sample_self.memory = 6609
nan
can_sample_self.memory = 6610
can_sample_self.memory = 6610
nan
can_sample_self.memory = 6611
can_sample_self.memory = 6611
nan
can_sample_self.memory = 6612
can_sample_self.memory = 6612
nan
can_sample_self.memory = 6613
can_sample_self.memory = 6613
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6614
can_sample_self.memory = 6614
nan
can_sample_self.memory = 6615
can_sample_self.memory = 6615
nan
can_sample_self.memory = 6616
can_sample_self.memory = 6616
nan
can_sample_self.memory = 6617
can_sample_self.memory = 6617
nan
can_sample_self.memory = 6618
can_sample_self.memory = 6618
nan
can_sample_self.memory = 6619
can_sample_self.memory = 6619
nan
can_sample_self.memory = 6620
can_sample_self.memory = 6620
nan
can_sample_self.memory = 6621
can_sample_self.memory = 6621
nan
can_sample_self.memory = 6622
can_sample_self.memory = 6622
nan
can_sample_self.memory = 6623
can_sample_self.memory = 6623

 56%|████████████████████████████████████████████▍                                   | 111/200 [04:12<02:43,  1.83s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6626
can_sample_self.memory = 6626
nan
can_sample_self.memory = 6627
can_sample_self.memory = 6627
nan
can_sample_self.memory = 6628
can_sample_self.memory = 6628
nan
can_sample_self.memory = 6629
can_sample_self.memory = 6629
nan
can_sample_self.memory = 6630
can_sample_self.memory = 6630
nan
can_sample_self.memory = 6631
can_sample_self.memory = 6631
nan
can_sample_self.memory = 6632
can_sample_self.memory = 6632
nan
can_sample_self.memory = 6633
can_sample_self.memory = 6633
nan
can_sample_self.memory = 6634
can_sample_self.memory = 6634
nan
can_sample_self.memory = 6635
can_sample_self.memory = 6635
nan
can_sample_self.memory = 6636
can_sample_self.memory = 6636
nan
can_sample_self.memory = 6637
can_sample_self.memory = 6637
nan
can_sample_self.memory = 6638
can_sample_self.memory = 6638
nan
can_sample_self.memory = 6639
can_sample_self.memory = 6639
nan
can_sample_self.memory = 6640
can_sample_self.memory = 

 56%|████████████████████████████████████████████▊                                   | 112/200 [04:13<02:18,  1.58s/it]

nan
can_sample_self.memory = 6645
can_sample_self.memory = 6645
nan
can_sample_self.memory = 6646
can_sample_self.memory = 6646
nan
can_sample_self.memory = 6647
can_sample_self.memory = 6647
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6648
can_sample_self.memory = 6648
nan
can_sample_self.memory = 6649
can_sample_self.memory = 6649
nan
can_sample_self.memory = 6650
can_sample_self.memory = 6650
nan
can_sample_self.memory = 6651
can_sample_self.memory = 6651
nan
can_sample_self.memory = 6652
can_sample_self.memory = 6652
nan
can_sample_self.memory = 6653
can_sample_self.memory = 6653
nan
can_sample_self.memory = 6654
can_sample_self.memory = 6654
nan
can_sample_self.memory = 6655
can_sample_self.memory = 6655
nan
can_sample_self.memory = 6656
can_sample_self.memory = 6656
nan
can_sample_self.memory = 6657
can_sample_self.memory = 6657
nan
can_sample_self.memory = 6658
can_sample_self.memory = 6658
nan
can_sample_self.memory = 6659
can_sample_self.memory = 

 56%|█████████████████████████████████████████████▏                                  | 113/200 [04:16<03:04,  2.12s/it]

can_sample_self.memory = 6734
can_sample_self.memory = 6734
nan
can_sample_self.memory = 6735
can_sample_self.memory = 6735
nan
can_sample_self.memory = 6736
can_sample_self.memory = 6736
nan
can_sample_self.memory = 6737
can_sample_self.memory = 6737
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6738
can_sample_self.memory = 6738
nan
can_sample_self.memory = 6739
can_sample_self.memory = 6739
nan
can_sample_self.memory = 6740
can_sample_self.memory = 6740
nan
can_sample_self.memory = 6741
can_sample_self.memory = 6741
nan
can_sample_self.memory = 6742
can_sample_self.memory = 6742
nan
can_sample_self.memory = 6743
can_sample_self.memory = 6743
nan
can_sample_self.memory = 6744
can_sample_self.memory = 6744
nan
can_sample_self.memory = 6745
can_sample_self.memory = 6745
nan
can_sample_self.memory = 6746
can_sample_self.memory = 6746
nan
can_sample_self.memory = 6747
can_sample_self.memory = 6747
nan
can_sample_self.memory = 6748
can_sample_self.memory = 6748

 57%|█████████████████████████████████████████████▌                                  | 114/200 [04:21<04:08,  2.89s/it]

can_sample_self.memory = 6862
can_sample_self.memory = 6862
nan
can_sample_self.memory = 6863
can_sample_self.memory = 6863
nan
can_sample_self.memory = 6864
can_sample_self.memory = 6864
nan
can_sample_self.memory = 6865
can_sample_self.memory = 6865
nan
can_sample_self.memory = 6866
can_sample_self.memory = 6866
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 6867
can_sample_self.memory = 6867
nan
can_sample_self.memory = 6868
can_sample_self.memory = 6868
nan
can_sample_self.memory = 6869
can_sample_self.memory = 6869
nan
can_sample_self.memory = 6870
can_sample_self.memory = 6870
nan
can_sample_self.memory = 6871
can_sample_self.memory = 6871
nan
can_sample_self.memory = 6872
can_sample_self.memory = 6872
nan
can_sample_self.memory = 6873
can_sample_self.memory = 6873
nan
can_sample_self.memory = 6874
can_sample_self.memory = 6874
nan
can_sample_self.memory = 6875
can_sample_self.memory = 6875
nan
can_sample_self.memory = 6876
can_sample_self.memory = 6876

nan
can_sample_self.memory = 6991
can_sample_self.memory = 6991
nan
can_sample_self.memory = 6992
can_sample_self.memory = 6992
nan
can_sample_self.memory = 6993
can_sample_self.memory = 6993
nan
can_sample_self.memory = 6994
can_sample_self.memory = 6994
nan
can_sample_self.memory = 6995
can_sample_self.memory = 6995
nan
can_sample_self.memory = 6996
can_sample_self.memory = 6996
nan
can_sample_self.memory = 6997
can_sample_self.memory = 6997
nan
can_sample_self.memory = 6998
can_sample_self.memory = 6998
nan
can_sample_self.memory = 6999
can_sample_self.memory = 6999
nan
can_sample_self.memory = 7000
can_sample_self.memory = 7000
nan
can_sample_self.memory = 7001
can_sample_self.memory = 7001
nan
can_sample_self.memory = 7002
can_sample_self.memory = 7002
nan
can_sample_self.memory = 7003
can_sample_self.memory = 7003
nan
can_sample_self.memory = 7004
can_sample_self.memory = 7004
nan
can_sample_self.memory = 7005
can_sample_self.memory = 7005
nan
can_sample_self.memory = 7006
can_sa

 57%|██████████████████████████████████████████████                                  | 115/200 [04:26<05:17,  3.74s/it]

nan
can_sample_self.memory = 7020
can_sample_self.memory = 7020
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7021
can_sample_self.memory = 7021
nan
can_sample_self.memory = 7022
can_sample_self.memory = 7022
nan
can_sample_self.memory = 7023
can_sample_self.memory = 7023
nan
can_sample_self.memory = 7024
can_sample_self.memory = 7024
nan
can_sample_self.memory = 7025
can_sample_self.memory = 7025
nan
can_sample_self.memory = 7026
can_sample_self.memory = 7026
nan
can_sample_self.memory = 7027
can_sample_self.memory = 7027
nan
can_sample_self.memory = 7028
can_sample_self.memory = 7028
nan
can_sample_self.memory = 7029
can_sample_self.memory = 7029
nan
can_sample_self.memory = 7030
can_sample_self.memory = 7030
nan
can_sample_self.memory = 7031
can_sample_self.memory = 7031
nan
can_sample_self.memory = 7032
can_sample_self.memory = 7032
nan
can_sample_self.memory = 7033
can_sample_self.memory = 7033
nan
can_sample_self.memory = 7034
can_sample_self.memory = 

can_sample_self.memory = 7148
can_sample_self.memory = 7148
nan
can_sample_self.memory = 7149
can_sample_self.memory = 7149
nan
can_sample_self.memory = 7150
can_sample_self.memory = 7150
nan
can_sample_self.memory = 7151
can_sample_self.memory = 7151
nan
can_sample_self.memory = 7152
can_sample_self.memory = 7152
nan
can_sample_self.memory = 7153
can_sample_self.memory = 7153
nan
can_sample_self.memory = 7154
can_sample_self.memory = 7154
nan
can_sample_self.memory = 7155
can_sample_self.memory = 7155
nan
can_sample_self.memory = 7156
can_sample_self.memory = 7156
nan
can_sample_self.memory = 7157
can_sample_self.memory = 7157
nan
can_sample_self.memory = 7158
can_sample_self.memory = 7158
nan
can_sample_self.memory = 7159
can_sample_self.memory = 7159
nan
can_sample_self.memory = 7160
can_sample_self.memory = 7160


 58%|██████████████████████████████████████████████▍                                 | 116/200 [04:32<05:51,  4.19s/it]

nan
can_sample_self.memory = 7161
can_sample_self.memory = 7161
nan
can_sample_self.memory = 7162
can_sample_self.memory = 7162
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7163
can_sample_self.memory = 7163
nan
can_sample_self.memory = 7164
can_sample_self.memory = 7164
nan
can_sample_self.memory = 7165
can_sample_self.memory = 7165
nan
can_sample_self.memory = 7166
can_sample_self.memory = 7166
nan
can_sample_self.memory = 7167
can_sample_self.memory = 7167
nan
can_sample_self.memory = 7168
can_sample_self.memory = 7168
nan
can_sample_self.memory = 7169
can_sample_self.memory = 7169
nan
can_sample_self.memory = 7170
can_sample_self.memory = 7170
nan
can_sample_self.memory = 7171
can_sample_self.memory = 7171
nan
can_sample_self.memory = 7172
can_sample_self.memory = 7172
nan
can_sample_self.memory = 7173
can_sample_self.memory = 7173
nan
can_sample_self.memory = 7174
can_sample_self.memory = 7174


 58%|██████████████████████████████████████████████▊                                 | 117/200 [04:32<04:23,  3.17s/it]

nan
can_sample_self.memory = 7175
can_sample_self.memory = 7175
nan
can_sample_self.memory = 7176
can_sample_self.memory = 7176
nan
can_sample_self.memory = 7177
can_sample_self.memory = 7177
nan
can_sample_self.memory = 7178
can_sample_self.memory = 7178
nan
can_sample_self.memory = 7179
can_sample_self.memory = 7179
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7180
can_sample_self.memory = 7180
nan
can_sample_self.memory = 7181
can_sample_self.memory = 7181
nan
can_sample_self.memory = 7182
can_sample_self.memory = 7182
nan
can_sample_self.memory = 7183
can_sample_self.memory = 7183
nan
can_sample_self.memory = 7184
can_sample_self.memory = 7184
nan
can_sample_self.memory = 7185
can_sample_self.memory = 7185
nan
can_sample_self.memory = 7186
can_sample_self.memory = 7186
nan
can_sample_self.memory = 7187
can_sample_self.memory = 7187
nan
can_sample_self.memory = 7188
can_sample_self.memory = 7188
nan
can_sample_self.memory = 7189
can_sample_self.memory = 

 59%|███████████████████████████████████████████████▏                                | 118/200 [04:35<04:04,  2.98s/it]

can_sample_self.memory = 7243
can_sample_self.memory = 7243
nan
can_sample_self.memory = 7244
can_sample_self.memory = 7244
nan
can_sample_self.memory = 7245
can_sample_self.memory = 7245
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7246
can_sample_self.memory = 7246
nan
can_sample_self.memory = 7247
can_sample_self.memory = 7247
nan
can_sample_self.memory = 7248
can_sample_self.memory = 7248
nan
can_sample_self.memory = 7249
can_sample_self.memory = 7249
nan
can_sample_self.memory = 7250
can_sample_self.memory = 7250
nan
can_sample_self.memory = 7251
can_sample_self.memory = 7251
nan
can_sample_self.memory = 7252
can_sample_self.memory = 7252
nan
can_sample_self.memory = 7253
can_sample_self.memory = 7253
nan
can_sample_self.memory = 7254
can_sample_self.memory = 7254
nan
can_sample_self.memory = 7255
can_sample_self.memory = 7255
nan
can_sample_self.memory = 7256
can_sample_self.memory = 7256
nan
can_sample_self.memory = 7257
can_sample_self.memory = 7257

nan
can_sample_self.memory = 7376
can_sample_self.memory = 7376
nan
can_sample_self.memory = 7377
can_sample_self.memory = 7377
nan
can_sample_self.memory = 7378
can_sample_self.memory = 7378
nan
can_sample_self.memory = 7379
can_sample_self.memory = 7379
nan
can_sample_self.memory = 7380
can_sample_self.memory = 7380
nan
can_sample_self.memory = 7381
can_sample_self.memory = 7381
nan
can_sample_self.memory = 7382
can_sample_self.memory = 7382
nan
can_sample_self.memory = 7383
can_sample_self.memory = 7383
nan
can_sample_self.memory = 7384
can_sample_self.memory = 7384
nan
can_sample_self.memory = 7385
can_sample_self.memory = 7385
nan
can_sample_self.memory = 7386
can_sample_self.memory = 7386
nan
can_sample_self.memory = 7387
can_sample_self.memory = 7387


 60%|███████████████████████████████████████████████▌                                | 119/200 [04:40<04:59,  3.70s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7388
can_sample_self.memory = 7388
nan
can_sample_self.memory = 7389
can_sample_self.memory = 7389
nan
can_sample_self.memory = 7390
can_sample_self.memory = 7390
nan
can_sample_self.memory = 7391
can_sample_self.memory = 7391
nan
can_sample_self.memory = 7392
can_sample_self.memory = 7392
nan
can_sample_self.memory = 7393
can_sample_self.memory = 7393
nan
can_sample_self.memory = 7394
can_sample_self.memory = 7394
nan
can_sample_self.memory = 7395
can_sample_self.memory = 7395
nan
can_sample_self.memory = 7396
can_sample_self.memory = 7396
nan
can_sample_self.memory = 7397
can_sample_self.memory = 7397
nan
can_sample_self.memory = 7398
can_sample_self.memory = 7398
nan
can_sample_self.memory = 7399
can_sample_self.memory = 7399
nan
can_sample_self.memory = 7400
can_sample_self.memory = 7400
nan
can_sample_self.memory = 7401
can_sample_self.memory = 7401
nan
can_sample_self.memory = 7402
can_sample_self.memory = 

 60%|████████████████████████████████████████████████                                | 120/200 [04:41<03:51,  2.90s/it]

nan
can_sample_self.memory = 7407
can_sample_self.memory = 7407
nan
can_sample_self.memory = 7408
can_sample_self.memory = 7408
nan
can_sample_self.memory = 7409
can_sample_self.memory = 7409
nan
can_sample_self.memory = 7410
can_sample_self.memory = 7410
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7411
can_sample_self.memory = 7411
nan
can_sample_self.memory = 7412
can_sample_self.memory = 7412
nan
can_sample_self.memory = 7413
can_sample_self.memory = 7413
nan
can_sample_self.memory = 7414
can_sample_self.memory = 7414
nan
can_sample_self.memory = 7415
can_sample_self.memory = 7415


 60%|████████████████████████████████████████████████▍                               | 121/200 [04:42<02:50,  2.16s/it]

nan
can_sample_self.memory = 7416
can_sample_self.memory = 7416
nan
can_sample_self.memory = 7417
can_sample_self.memory = 7417
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7418
can_sample_self.memory = 7418
nan
can_sample_self.memory = 7419
can_sample_self.memory = 7419
nan
can_sample_self.memory = 7420
can_sample_self.memory = 7420
nan
can_sample_self.memory = 7421
can_sample_self.memory = 7421
nan
can_sample_self.memory = 7422
can_sample_self.memory = 7422
nan
can_sample_self.memory = 7423
can_sample_self.memory = 7423
nan
can_sample_self.memory = 7424
can_sample_self.memory = 7424
nan
can_sample_self.memory = 7425
can_sample_self.memory = 7425
nan
can_sample_self.memory = 7426
can_sample_self.memory = 7426
nan
can_sample_self.memory = 7427
can_sample_self.memory = 7427


 61%|████████████████████████████████████████████████▊                               | 122/200 [04:42<02:11,  1.68s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7428
can_sample_self.memory = 7428
nan
can_sample_self.memory = 7429
can_sample_self.memory = 7429
nan
can_sample_self.memory = 7430
can_sample_self.memory = 7430
nan
can_sample_self.memory = 7431
can_sample_self.memory = 7431
nan
can_sample_self.memory = 7432
can_sample_self.memory = 7432
nan
can_sample_self.memory = 7433
can_sample_self.memory = 7433
nan
can_sample_self.memory = 7434
can_sample_self.memory = 7434
nan
can_sample_self.memory = 7435
can_sample_self.memory = 7435
nan
can_sample_self.memory = 7436
can_sample_self.memory = 7436
nan
can_sample_self.memory = 7437
can_sample_self.memory = 7437
nan
can_sample_self.memory = 7438
can_sample_self.memory = 7438
nan
can_sample_self.memory = 7439
can_sample_self.memory = 7439
nan
can_sample_self.memory = 7440
can_sample_self.memory = 7440
nan
can_sample_self.memory = 7441
can_sample_self.memory = 7441
nan
can_sample_self.memory = 7442
can_sample_self.memory = 

 62%|█████████████████████████████████████████████████▏                              | 123/200 [04:46<02:48,  2.18s/it]

can_sample_self.memory = 7512
can_sample_self.memory = 7512
nan
can_sample_self.memory = 7513
can_sample_self.memory = 7513
nan
can_sample_self.memory = 7514
can_sample_self.memory = 7514
nan
can_sample_self.memory = 7515
can_sample_self.memory = 7515
nan
can_sample_self.memory = 7516
can_sample_self.memory = 7516
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7517
can_sample_self.memory = 7517
nan
can_sample_self.memory = 7518
can_sample_self.memory = 7518
nan
can_sample_self.memory = 7519
can_sample_self.memory = 7519
nan
can_sample_self.memory = 7520
can_sample_self.memory = 7520
nan
can_sample_self.memory = 7521
can_sample_self.memory = 7521
nan
can_sample_self.memory = 7522
can_sample_self.memory = 7522
nan
can_sample_self.memory = 7523
can_sample_self.memory = 7523
nan
can_sample_self.memory = 7524
can_sample_self.memory = 7524
nan
can_sample_self.memory = 7525
can_sample_self.memory = 7525
nan
can_sample_self.memory = 7526
can_sample_self.memory = 7526

 62%|█████████████████████████████████████████████████▌                              | 124/200 [04:47<02:29,  1.97s/it]

nan
can_sample_self.memory = 7552
can_sample_self.memory = 7552
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7553
can_sample_self.memory = 7553
nan
can_sample_self.memory = 7554
can_sample_self.memory = 7554
nan
can_sample_self.memory = 7555
can_sample_self.memory = 7555
nan
can_sample_self.memory = 7556
can_sample_self.memory = 7556
nan
can_sample_self.memory = 7557
can_sample_self.memory = 7557
nan
can_sample_self.memory = 7558
can_sample_self.memory = 7558
nan
can_sample_self.memory = 7559
can_sample_self.memory = 7559
nan
can_sample_self.memory = 7560
can_sample_self.memory = 7560
nan
can_sample_self.memory = 7561
can_sample_self.memory = 7561
nan
can_sample_self.memory = 7562
can_sample_self.memory = 7562
nan
can_sample_self.memory = 7563
can_sample_self.memory = 7563
nan
can_sample_self.memory = 7564
can_sample_self.memory = 7564
nan
can_sample_self.memory = 7565
can_sample_self.memory = 7565
nan
can_sample_self.memory = 7566
can_sample_self.memory = 

 62%|██████████████████████████████████████████████████                              | 125/200 [04:51<03:06,  2.48s/it]

nan
can_sample_self.memory = 7647
can_sample_self.memory = 7647
nan
can_sample_self.memory = 7648
can_sample_self.memory = 7648
nan
can_sample_self.memory = 7649
can_sample_self.memory = 7649
nan
can_sample_self.memory = 7650
can_sample_self.memory = 7650
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7651
can_sample_self.memory = 7651
nan
can_sample_self.memory = 7652
can_sample_self.memory = 7652
nan
can_sample_self.memory = 7653
can_sample_self.memory = 7653
nan
can_sample_self.memory = 7654
can_sample_self.memory = 7654
nan
can_sample_self.memory = 7655
can_sample_self.memory = 7655
nan
can_sample_self.memory = 7656
can_sample_self.memory = 7656
nan
can_sample_self.memory = 7657
can_sample_self.memory = 7657
nan
can_sample_self.memory = 7658
can_sample_self.memory = 7658
nan
can_sample_self.memory = 7659
can_sample_self.memory = 7659
nan
can_sample_self.memory = 7660
can_sample_self.memory = 7660
nan
can_sample_self.memory = 7661
can_sample_self.memory = 

 63%|██████████████████████████████████████████████████▍                             | 126/200 [04:55<03:41,  3.00s/it]

nan
can_sample_self.memory = 7755
can_sample_self.memory = 7755
nan
can_sample_self.memory = 7756
can_sample_self.memory = 7756
nan
can_sample_self.memory = 7757
can_sample_self.memory = 7757
nan
can_sample_self.memory = 7758
can_sample_self.memory = 7758
nan
can_sample_self.memory = 7759
can_sample_self.memory = 7759
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7760
can_sample_self.memory = 7760
nan
can_sample_self.memory = 7761
can_sample_self.memory = 7761
nan
can_sample_self.memory = 7762
can_sample_self.memory = 7762
nan
can_sample_self.memory = 7763
can_sample_self.memory = 7763
nan
can_sample_self.memory = 7764
can_sample_self.memory = 7764
nan
can_sample_self.memory = 7765
can_sample_self.memory = 7765
nan
can_sample_self.memory = 7766
can_sample_self.memory = 7766
nan
can_sample_self.memory = 7767
can_sample_self.memory = 7767
nan
can_sample_self.memory = 7768
can_sample_self.memory = 7768
nan
can_sample_self.memory = 7769
can_sample_self.memory = 

 64%|██████████████████████████████████████████████████▊                             | 127/200 [04:56<02:52,  2.37s/it]

can_sample_self.memory = 7778
can_sample_self.memory = 7778
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7779
can_sample_self.memory = 7779
nan
can_sample_self.memory = 7780
can_sample_self.memory = 7780
nan
can_sample_self.memory = 7781
can_sample_self.memory = 7781
nan
can_sample_self.memory = 7782
can_sample_self.memory = 7782
nan
can_sample_self.memory = 7783
can_sample_self.memory = 7783
nan
can_sample_self.memory = 7784
can_sample_self.memory = 7784
nan
can_sample_self.memory = 7785
can_sample_self.memory = 7785
nan
can_sample_self.memory = 7786
can_sample_self.memory = 7786
nan
can_sample_self.memory = 7787
can_sample_self.memory = 7787
nan
can_sample_self.memory = 7788
can_sample_self.memory = 7788
nan
can_sample_self.memory = 7789
can_sample_self.memory = 7789
nan
can_sample_self.memory = 7790
can_sample_self.memory = 7790
nan
can_sample_self.memory = 7791
can_sample_self.memory = 7791
nan
can_sample_self.memory = 7792
can_sample_self.memory = 7792

 64%|███████████████████████████████████████████████████▏                            | 128/200 [05:00<03:34,  2.99s/it]

nan
can_sample_self.memory = 7892
can_sample_self.memory = 7892
nan
can_sample_self.memory = 7893
can_sample_self.memory = 7893
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7894
can_sample_self.memory = 7894
nan
can_sample_self.memory = 7895
can_sample_self.memory = 7895
nan
can_sample_self.memory = 7896
can_sample_self.memory = 7896
nan
can_sample_self.memory = 7897
can_sample_self.memory = 7897
nan
can_sample_self.memory = 7898
can_sample_self.memory = 7898
nan
can_sample_self.memory = 7899
can_sample_self.memory = 7899


 64%|███████████████████████████████████████████████████▌                            | 129/200 [05:01<02:37,  2.22s/it]

nan
can_sample_self.memory = 7900
can_sample_self.memory = 7900
nan
can_sample_self.memory = 7901
can_sample_self.memory = 7901
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7902
can_sample_self.memory = 7902
nan
can_sample_self.memory = 7903
can_sample_self.memory = 7903
nan
can_sample_self.memory = 7904
can_sample_self.memory = 7904
nan
can_sample_self.memory = 7905
can_sample_self.memory = 7905
nan
can_sample_self.memory = 7906
can_sample_self.memory = 7906
nan
can_sample_self.memory = 7907
can_sample_self.memory = 7907
nan
can_sample_self.memory = 7908
can_sample_self.memory = 7908
nan
can_sample_self.memory = 7909
can_sample_self.memory = 7909
nan
can_sample_self.memory = 7910
can_sample_self.memory = 7910
nan
can_sample_self.memory = 7911
can_sample_self.memory = 7911
nan
can_sample_self.memory = 7912
can_sample_self.memory = 7912
nan
can_sample_self.memory = 7913
can_sample_self.memory = 7913
nan
can_sample_self.memory = 7914
can_sample_self.memory = 

 65%|████████████████████████████████████████████████████                            | 130/200 [05:02<02:25,  2.07s/it]

can_sample_self.memory = 7942
can_sample_self.memory = 7942
nan
can_sample_self.memory = 7943
can_sample_self.memory = 7943
nan
can_sample_self.memory = 7944
can_sample_self.memory = 7944
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 7945
can_sample_self.memory = 7945
nan
can_sample_self.memory = 7946
can_sample_self.memory = 7946
nan
can_sample_self.memory = 7947
can_sample_self.memory = 7947
nan
can_sample_self.memory = 7948
can_sample_self.memory = 7948
nan
can_sample_self.memory = 7949
can_sample_self.memory = 7949
nan
can_sample_self.memory = 7950
can_sample_self.memory = 7950
nan
can_sample_self.memory = 7951
can_sample_self.memory = 7951
nan
can_sample_self.memory = 7952
can_sample_self.memory = 7952
nan
can_sample_self.memory = 7953
can_sample_self.memory = 7953
nan
can_sample_self.memory = 7954
can_sample_self.memory = 7954
nan
can_sample_self.memory = 7955
can_sample_self.memory = 7955
nan
can_sample_self.memory = 7956
can_sample_self.memory = 7956

 66%|████████████████████████████████████████████████████▍                           | 131/200 [05:05<02:39,  2.31s/it]

nan
can_sample_self.memory = 8018
can_sample_self.memory = 8018
nan
can_sample_self.memory = 8019
can_sample_self.memory = 8019
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8020
can_sample_self.memory = 8020
nan
can_sample_self.memory = 8021
can_sample_self.memory = 8021
nan
can_sample_self.memory = 8022
can_sample_self.memory = 8022
nan
can_sample_self.memory = 8023
can_sample_self.memory = 8023
nan
can_sample_self.memory = 8024
can_sample_self.memory = 8024
nan
can_sample_self.memory = 8025
can_sample_self.memory = 8025
nan
can_sample_self.memory = 8026
can_sample_self.memory = 8026
nan
can_sample_self.memory = 8027
can_sample_self.memory = 8027
nan
can_sample_self.memory = 8028
can_sample_self.memory = 8028
nan
can_sample_self.memory = 8029
can_sample_self.memory = 8029
nan
can_sample_self.memory = 8030
can_sample_self.memory = 8030
nan
can_sample_self.memory = 8031
can_sample_self.memory = 8031
nan
can_sample_self.memory = 8032
can_sample_self.memory = 

 66%|████████████████████████████████████████████████████▊                           | 132/200 [05:06<02:09,  1.91s/it]

can_sample_self.memory = 8036
can_sample_self.memory = 8036
nan
can_sample_self.memory = 8037
can_sample_self.memory = 8037
nan
can_sample_self.memory = 8038
can_sample_self.memory = 8038
nan
can_sample_self.memory = 8039
can_sample_self.memory = 8039
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8040
can_sample_self.memory = 8040
nan
can_sample_self.memory = 8041
can_sample_self.memory = 8041
nan
can_sample_self.memory = 8042
can_sample_self.memory = 8042
nan
can_sample_self.memory = 8043
can_sample_self.memory = 8043
nan
can_sample_self.memory = 8044
can_sample_self.memory = 8044
nan
can_sample_self.memory = 8045
can_sample_self.memory = 8045
nan
can_sample_self.memory = 8046
can_sample_self.memory = 8046
nan
can_sample_self.memory = 8047
can_sample_self.memory = 8047
nan
can_sample_self.memory = 8048
can_sample_self.memory = 8048
nan
can_sample_self.memory = 8049
can_sample_self.memory = 8049
nan
can_sample_self.memory = 8050
can_sample_self.memory = 8050

 66%|█████████████████████████████████████████████████████▏                          | 133/200 [05:08<02:10,  1.94s/it]

nan
can_sample_self.memory = 8087
can_sample_self.memory = 8087
nan
can_sample_self.memory = 8088
can_sample_self.memory = 8088
nan
can_sample_self.memory = 8089
can_sample_self.memory = 8089
nan
can_sample_self.memory = 8090
can_sample_self.memory = 8090
nan
can_sample_self.memory = 8091
can_sample_self.memory = 8091
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8092
can_sample_self.memory = 8092


 67%|█████████████████████████████████████████████████████▌                          | 134/200 [05:09<01:35,  1.45s/it]

nan
can_sample_self.memory = 8093
can_sample_self.memory = 8093
nan
can_sample_self.memory = 8094
can_sample_self.memory = 8094
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8095
can_sample_self.memory = 8095
nan
can_sample_self.memory = 8096
can_sample_self.memory = 8096
nan
can_sample_self.memory = 8097
can_sample_self.memory = 8097
nan
can_sample_self.memory = 8098
can_sample_self.memory = 8098
nan
can_sample_self.memory = 8099
can_sample_self.memory = 8099
nan
can_sample_self.memory = 8100
can_sample_self.memory = 8100
nan
can_sample_self.memory = 8101
can_sample_self.memory = 8101
nan
can_sample_self.memory = 8102
can_sample_self.memory = 8102
nan
can_sample_self.memory = 8103
can_sample_self.memory = 8103
nan
can_sample_self.memory = 8104
can_sample_self.memory = 8104
nan
can_sample_self.memory = 8105
can_sample_self.memory = 8105
nan
can_sample_self.memory = 8106
can_sample_self.memory = 8106
nan
can_sample_self.memory = 8107
can_sample_self.memory = 

 68%|██████████████████████████████████████████████████████                          | 135/200 [05:10<01:41,  1.56s/it]

can_sample_self.memory = 8140
can_sample_self.memory = 8140
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8141
can_sample_self.memory = 8141
nan
can_sample_self.memory = 8142
can_sample_self.memory = 8142
nan
can_sample_self.memory = 8143
can_sample_self.memory = 8143
nan
can_sample_self.memory = 8144
can_sample_self.memory = 8144
nan
can_sample_self.memory = 8145
can_sample_self.memory = 8145
nan
can_sample_self.memory = 8146
can_sample_self.memory = 8146
nan
can_sample_self.memory = 8147
can_sample_self.memory = 8147
nan
can_sample_self.memory = 8148
can_sample_self.memory = 8148
nan
can_sample_self.memory = 8149
can_sample_self.memory = 8149
nan
can_sample_self.memory = 8150
can_sample_self.memory = 8150
nan
can_sample_self.memory = 8151
can_sample_self.memory = 8151
nan
can_sample_self.memory = 8152
can_sample_self.memory = 8152
nan
can_sample_self.memory = 8153
can_sample_self.memory = 8153
nan
can_sample_self.memory = 8154
can_sample_self.memory = 8154

can_sample_self.memory = 8270
can_sample_self.memory = 8270
nan
can_sample_self.memory = 8271
can_sample_self.memory = 8271
nan
can_sample_self.memory = 8272
can_sample_self.memory = 8272
nan
can_sample_self.memory = 8273
can_sample_self.memory = 8273
nan
can_sample_self.memory = 8274
can_sample_self.memory = 8274
nan
can_sample_self.memory = 8275
can_sample_self.memory = 8275
nan
can_sample_self.memory = 8276
can_sample_self.memory = 8276


 68%|██████████████████████████████████████████████████████▍                         | 136/200 [05:16<02:49,  2.65s/it]

nan
can_sample_self.memory = 8277
can_sample_self.memory = 8277
nan
can_sample_self.memory = 8278
can_sample_self.memory = 8278
nan
can_sample_self.memory = 8279
can_sample_self.memory = 8279
nan
can_sample_self.memory = 8280
can_sample_self.memory = 8280
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8281
can_sample_self.memory = 8281
nan
can_sample_self.memory = 8282
can_sample_self.memory = 8282
nan
can_sample_self.memory = 8283
can_sample_self.memory = 8283
nan
can_sample_self.memory = 8284
can_sample_self.memory = 8284
nan
can_sample_self.memory = 8285
can_sample_self.memory = 8285
nan
can_sample_self.memory = 8286
can_sample_self.memory = 8286
nan
can_sample_self.memory = 8287
can_sample_self.memory = 8287
nan
can_sample_self.memory = 8288
can_sample_self.memory = 8288
nan
can_sample_self.memory = 8289
can_sample_self.memory = 8289
nan
can_sample_self.memory = 8290
can_sample_self.memory = 8290
nan
can_sample_self.memory = 8291
can_sample_self.memory = 

 68%|██████████████████████████████████████████████████████▊                         | 137/200 [05:20<03:28,  3.31s/it]

can_sample_self.memory = 8407
can_sample_self.memory = 8407
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8408
can_sample_self.memory = 8408
nan
can_sample_self.memory = 8409
can_sample_self.memory = 8409
nan
can_sample_self.memory = 8410
can_sample_self.memory = 8410
nan
can_sample_self.memory = 8411
can_sample_self.memory = 8411
nan
can_sample_self.memory = 8412
can_sample_self.memory = 8412
nan
can_sample_self.memory = 8413
can_sample_self.memory = 8413
nan
can_sample_self.memory = 8414
can_sample_self.memory = 8414
nan
can_sample_self.memory = 8415
can_sample_self.memory = 8415
nan
can_sample_self.memory = 8416
can_sample_self.memory = 8416
nan
can_sample_self.memory = 8417
can_sample_self.memory = 8417
nan


 69%|███████████████████████████████████████████████████████▏                        | 138/200 [05:21<02:37,  2.55s/it]

can_sample_self.memory = 8418
can_sample_self.memory = 8418
nan
can_sample_self.memory = 8419
can_sample_self.memory = 8419
nan
can_sample_self.memory = 8420
can_sample_self.memory = 8420
nan
can_sample_self.memory = 8421
can_sample_self.memory = 8421
nan
can_sample_self.memory = 8422
can_sample_self.memory = 8422
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8423
can_sample_self.memory = 8423
nan
can_sample_self.memory = 8424
can_sample_self.memory = 8424
nan
can_sample_self.memory = 8425
can_sample_self.memory = 8425
nan
can_sample_self.memory = 8426
can_sample_self.memory = 8426
nan
can_sample_self.memory = 8427
can_sample_self.memory = 8427
nan
can_sample_self.memory = 8428
can_sample_self.memory = 8428
nan
can_sample_self.memory = 8429
can_sample_self.memory = 8429
nan
can_sample_self.memory = 8430
can_sample_self.memory = 8430
nan
can_sample_self.memory = 8431
can_sample_self.memory = 8431
nan
can_sample_self.memory = 8432
can_sample_self.memory = 8432

 70%|███████████████████████████████████████████████████████▌                        | 139/200 [05:22<02:09,  2.12s/it]

can_sample_self.memory = 8444
can_sample_self.memory = 8444
nan
can_sample_self.memory = 8445
can_sample_self.memory = 8445
nan
can_sample_self.memory = 8446
can_sample_self.memory = 8446
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8447
can_sample_self.memory = 8447
nan
can_sample_self.memory = 8448
can_sample_self.memory = 8448
nan
can_sample_self.memory = 8449
can_sample_self.memory = 8449
nan
can_sample_self.memory = 8450
can_sample_self.memory = 8450
nan
can_sample_self.memory = 8451
can_sample_self.memory = 8451
nan
can_sample_self.memory = 8452
can_sample_self.memory = 8452
nan
can_sample_self.memory = 8453
can_sample_self.memory = 8453
nan
can_sample_self.memory = 8454
can_sample_self.memory = 8454
nan
can_sample_self.memory = 8455
can_sample_self.memory = 8455
nan
can_sample_self.memory = 8456
can_sample_self.memory = 8456
nan
can_sample_self.memory = 8457
can_sample_self.memory = 8457
nan
can_sample_self.memory = 8458
can_sample_self.memory = 8458

nan
can_sample_self.memory = 8576
can_sample_self.memory = 8576
nan
can_sample_self.memory = 8577
can_sample_self.memory = 8577
nan
can_sample_self.memory = 8578
can_sample_self.memory = 8578
nan
can_sample_self.memory = 8579
can_sample_self.memory = 8579
nan
can_sample_self.memory = 8580
can_sample_self.memory = 8580
nan
can_sample_self.memory = 8581
can_sample_self.memory = 8581
nan
can_sample_self.memory = 8582
can_sample_self.memory = 8582
nan
can_sample_self.memory = 8583
can_sample_self.memory = 8583
nan
can_sample_self.memory = 8584
can_sample_self.memory = 8584
nan
can_sample_self.memory = 8585
can_sample_self.memory = 8585
nan
can_sample_self.memory = 8586
can_sample_self.memory = 8586
nan
can_sample_self.memory = 8587
can_sample_self.memory = 8587
nan
can_sample_self.memory = 8588
can_sample_self.memory = 8588
nan
can_sample_self.memory = 8589
can_sample_self.memory = 8589
nan
can_sample_self.memory = 8590
can_sample_self.memory = 8590
nan
can_sample_self.memory = 8591
can_sa

 70%|████████████████████████████████████████████████████████                        | 140/200 [05:29<03:24,  3.41s/it]

nan
can_sample_self.memory = 8606
can_sample_self.memory = 8606
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8607
can_sample_self.memory = 8607
nan
can_sample_self.memory = 8608
can_sample_self.memory = 8608
nan
can_sample_self.memory = 8609
can_sample_self.memory = 8609
nan
can_sample_self.memory = 8610
can_sample_self.memory = 8610
nan
can_sample_self.memory = 8611
can_sample_self.memory = 8611


 70%|████████████████████████████████████████████████████████▍                       | 141/200 [05:29<02:28,  2.52s/it]

nan
can_sample_self.memory = 8612
can_sample_self.memory = 8612
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8613
can_sample_self.memory = 8613
nan
can_sample_self.memory = 8614
can_sample_self.memory = 8614
nan
can_sample_self.memory = 8615
can_sample_self.memory = 8615
nan
can_sample_self.memory = 8616
can_sample_self.memory = 8616
nan
can_sample_self.memory = 8617
can_sample_self.memory = 8617
nan
can_sample_self.memory = 8618
can_sample_self.memory = 8618
nan
can_sample_self.memory = 8619
can_sample_self.memory = 8619
nan
can_sample_self.memory = 8620
can_sample_self.memory = 8620
nan
can_sample_self.memory = 8621
can_sample_self.memory = 8621
nan
can_sample_self.memory = 8622
can_sample_self.memory = 8622
nan
can_sample_self.memory = 8623
can_sample_self.memory = 8623
nan
can_sample_self.memory = 8624
can_sample_self.memory = 8624
nan
can_sample_self.memory = 8625
can_sample_self.memory = 8625
nan
can_sample_self.memory = 8626
can_sample_self.memory = 

 71%|████████████████████████████████████████████████████████▊                       | 142/200 [05:30<01:58,  2.05s/it]

nan
can_sample_self.memory = 8628
can_sample_self.memory = 8628
nan
can_sample_self.memory = 8629
can_sample_self.memory = 8629
nan
can_sample_self.memory = 8630
can_sample_self.memory = 8630
nan
can_sample_self.memory = 8631
can_sample_self.memory = 8631
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8632
can_sample_self.memory = 8632
nan
can_sample_self.memory = 8633
can_sample_self.memory = 8633
nan
can_sample_self.memory = 8634
can_sample_self.memory = 8634
nan
can_sample_self.memory = 8635
can_sample_self.memory = 8635
nan
can_sample_self.memory = 8636
can_sample_self.memory = 8636
nan
can_sample_self.memory = 8637
can_sample_self.memory = 8637
nan
can_sample_self.memory = 8638
can_sample_self.memory = 8638
nan
can_sample_self.memory = 8639
can_sample_self.memory = 8639
nan
can_sample_self.memory = 8640
can_sample_self.memory = 8640
nan
can_sample_self.memory = 8641
can_sample_self.memory = 8641
nan
can_sample_self.memory = 8642
can_sample_self.memory = 

can_sample_self.memory = 8756
can_sample_self.memory = 8756
nan
can_sample_self.memory = 8757
can_sample_self.memory = 8757
nan
can_sample_self.memory = 8758
can_sample_self.memory = 8758
nan
can_sample_self.memory = 8759
can_sample_self.memory = 8759
nan
can_sample_self.memory = 8760
can_sample_self.memory = 8760
nan
can_sample_self.memory = 8761
can_sample_self.memory = 8761


 72%|█████████████████████████████████████████████████████████▏                      | 143/200 [05:35<02:47,  2.94s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>


 72%|█████████████████████████████████████████████████████████▌                      | 144/200 [05:36<02:01,  2.17s/it]

can_sample_self.memory = 8762
can_sample_self.memory = 8762
nan
can_sample_self.memory = 8763
can_sample_self.memory = 8763
nan
can_sample_self.memory = 8764
can_sample_self.memory = 8764
nan
can_sample_self.memory = 8765
can_sample_self.memory = 8765
nan
can_sample_self.memory = 8766
can_sample_self.memory = 8766
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8767
can_sample_self.memory = 8767
nan
can_sample_self.memory = 8768
can_sample_self.memory = 8768
nan
can_sample_self.memory = 8769
can_sample_self.memory = 8769
nan
can_sample_self.memory = 8770
can_sample_self.memory = 8770
nan
can_sample_self.memory = 8771
can_sample_self.memory = 8771
nan
can_sample_self.memory = 8772
can_sample_self.memory = 8772
nan
can_sample_self.memory = 8773
can_sample_self.memory = 8773
nan
can_sample_self.memory = 8774
can_sample_self.memory = 8774
nan
can_sample_self.memory = 8775
can_sample_self.memory = 8775
nan
can_sample_self.memory = 8776
can_sample_self.memory = 8776

 72%|██████████████████████████████████████████████████████████                      | 145/200 [05:39<02:25,  2.65s/it]

nan
can_sample_self.memory = 8863
can_sample_self.memory = 8863
nan
can_sample_self.memory = 8864
can_sample_self.memory = 8864
nan
can_sample_self.memory = 8865
can_sample_self.memory = 8865
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8866
can_sample_self.memory = 8866
nan
can_sample_self.memory = 8867
can_sample_self.memory = 8867
nan
can_sample_self.memory = 8868
can_sample_self.memory = 8868
nan
can_sample_self.memory = 8869
can_sample_self.memory = 8869
nan
can_sample_self.memory = 8870
can_sample_self.memory = 8870
nan
can_sample_self.memory = 8871
can_sample_self.memory = 8871
nan
can_sample_self.memory = 8872
can_sample_self.memory = 8872
nan
can_sample_self.memory = 8873
can_sample_self.memory = 8873
nan
can_sample_self.memory = 8874
can_sample_self.memory = 8874
nan
can_sample_self.memory = 8875
can_sample_self.memory = 8875
nan
can_sample_self.memory = 8876
can_sample_self.memory = 8876
nan
can_sample_self.memory = 8877
can_sample_self.memory = 

 73%|██████████████████████████████████████████████████████████▍                     | 146/200 [05:44<02:53,  3.20s/it]

can_sample_self.memory = 8979
can_sample_self.memory = 8979
nan
can_sample_self.memory = 8980
can_sample_self.memory = 8980
nan
can_sample_self.memory = 8981
can_sample_self.memory = 8981
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8982
can_sample_self.memory = 8982
nan
can_sample_self.memory = 8983
can_sample_self.memory = 8983
nan
can_sample_self.memory = 8984
can_sample_self.memory = 8984
nan
can_sample_self.memory = 8985
can_sample_self.memory = 8985
nan
can_sample_self.memory = 8986
can_sample_self.memory = 8986
nan


 74%|██████████████████████████████████████████████████████████▊                     | 147/200 [05:44<02:06,  2.39s/it]

can_sample_self.memory = 8987
can_sample_self.memory = 8987
nan
can_sample_self.memory = 8988
can_sample_self.memory = 8988
nan
can_sample_self.memory = 8989
can_sample_self.memory = 8989
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 8990
can_sample_self.memory = 8990
nan
can_sample_self.memory = 8991
can_sample_self.memory = 8991
nan
can_sample_self.memory = 8992
can_sample_self.memory = 8992
nan
can_sample_self.memory = 8993
can_sample_self.memory = 8993
nan
can_sample_self.memory = 8994
can_sample_self.memory = 8994
nan
can_sample_self.memory = 8995
can_sample_self.memory = 8995
nan
can_sample_self.memory = 8996
can_sample_self.memory = 8996
nan
can_sample_self.memory = 8997
can_sample_self.memory = 8997
nan
can_sample_self.memory = 8998
can_sample_self.memory = 8998
nan
can_sample_self.memory = 8999
can_sample_self.memory = 8999
nan
can_sample_self.memory = 9000
can_sample_self.memory = 9000
nan
can_sample_self.memory = 9001
can_sample_self.memory = 9001

nan
can_sample_self.memory = 9117
can_sample_self.memory = 9117
nan
can_sample_self.memory = 9118
can_sample_self.memory = 9118
nan
can_sample_self.memory = 9119
can_sample_self.memory = 9119
nan
can_sample_self.memory = 9120
can_sample_self.memory = 9120
nan
can_sample_self.memory = 9121
can_sample_self.memory = 9121
nan
can_sample_self.memory = 9122
can_sample_self.memory = 9122
nan
can_sample_self.memory = 9123
can_sample_self.memory = 9123
nan
can_sample_self.memory = 9124
can_sample_self.memory = 9124
nan
can_sample_self.memory = 9125
can_sample_self.memory = 9125
nan
can_sample_self.memory = 9126
can_sample_self.memory = 9126
nan
can_sample_self.memory = 9127
can_sample_self.memory = 9127
nan
can_sample_self.memory = 9128
can_sample_self.memory = 9128
nan
can_sample_self.memory = 9129
can_sample_self.memory = 9129
nan
can_sample_self.memory = 9130
can_sample_self.memory = 9130
nan
can_sample_self.memory = 9131
can_sample_self.memory = 9131
nan
can_sample_self.memory = 9132
can_sa

 74%|███████████████████████████████████████████████████████████▏                    | 148/200 [05:51<03:04,  3.56s/it]

can_sample_self.memory = 9148
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9149
can_sample_self.memory = 9149
nan
can_sample_self.memory = 9150
can_sample_self.memory = 9150
nan
can_sample_self.memory = 9151
can_sample_self.memory = 9151
nan
can_sample_self.memory = 9152
can_sample_self.memory = 9152
nan
can_sample_self.memory = 9153
can_sample_self.memory = 9153
nan
can_sample_self.memory = 9154
can_sample_self.memory = 9154


 74%|███████████████████████████████████████████████████████████▌                    | 149/200 [05:51<02:14,  2.63s/it]

nan
can_sample_self.memory = 9155
can_sample_self.memory = 9155
nan
can_sample_self.memory = 9156
can_sample_self.memory = 9156
nan
<<< episode >>>


 75%|████████████████████████████████████████████████████████████                    | 150/200 [05:51<01:35,  1.91s/it]

type=<class 'numpy.ndarray'>
can_sample_self.memory = 9157
can_sample_self.memory = 9157
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9158
can_sample_self.memory = 9158
nan
can_sample_self.memory = 9159
can_sample_self.memory = 9159
nan
can_sample_self.memory = 9160
can_sample_self.memory = 9160
nan
can_sample_self.memory = 9161
can_sample_self.memory = 9161
nan
can_sample_self.memory = 9162
can_sample_self.memory = 9162
nan
can_sample_self.memory = 9163
can_sample_self.memory = 9163
nan
can_sample_self.memory = 9164
can_sample_self.memory = 9164
nan
can_sample_self.memory = 9165
can_sample_self.memory = 9165
nan
can_sample_self.memory = 9166
can_sample_self.memory = 9166
nan
can_sample_self.memory = 9167
can_sample_self.memory = 9167
nan
can_sample_self.memory = 9168
can_sample_self.memory = 9168
nan
can_sample_self.memory = 9169
can_sample_self.memory = 9169
nan
can_sample_self.memory = 9170
can_sample_self.memory = 9170
nan
can_sample_self.memory = 9171


 76%|████████████████████████████████████████████████████████████▍                   | 151/200 [05:55<02:04,  2.55s/it]

nan
can_sample_self.memory = 9257
can_sample_self.memory = 9257
nan
can_sample_self.memory = 9258
can_sample_self.memory = 9258
nan
can_sample_self.memory = 9259
can_sample_self.memory = 9259
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9260
can_sample_self.memory = 9260
nan
can_sample_self.memory = 9261
can_sample_self.memory = 9261
nan
can_sample_self.memory = 9262
can_sample_self.memory = 9262
nan
can_sample_self.memory = 9263
can_sample_self.memory = 9263
nan
can_sample_self.memory = 9264
can_sample_self.memory = 9264
nan
can_sample_self.memory = 9265
can_sample_self.memory = 9265
nan
can_sample_self.memory = 9266
can_sample_self.memory = 9266
nan
can_sample_self.memory = 9267
can_sample_self.memory = 9267
nan
can_sample_self.memory = 9268
can_sample_self.memory = 9268
nan
can_sample_self.memory = 9269
can_sample_self.memory = 9269
nan


 76%|████████████████████████████████████████████████████████████▊                   | 152/200 [05:56<01:36,  2.02s/it]

can_sample_self.memory = 9270
can_sample_self.memory = 9270
nan
can_sample_self.memory = 9271
can_sample_self.memory = 9271
nan
can_sample_self.memory = 9272
can_sample_self.memory = 9272
nan
can_sample_self.memory = 9273
can_sample_self.memory = 9273
nan
can_sample_self.memory = 9274
can_sample_self.memory = 9274
nan
can_sample_self.memory = 9275
can_sample_self.memory = 9275
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9276
can_sample_self.memory = 9276
nan
can_sample_self.memory = 9277
can_sample_self.memory = 9277
nan
can_sample_self.memory = 9278
can_sample_self.memory = 9278
nan
can_sample_self.memory = 9279
can_sample_self.memory = 9279
nan
can_sample_self.memory = 9280
can_sample_self.memory = 9280
nan
can_sample_self.memory = 9281
can_sample_self.memory = 9281
nan
can_sample_self.memory = 9282
can_sample_self.memory = 9282
nan
can_sample_self.memory = 9283
can_sample_self.memory = 9283
nan
can_sample_self.memory = 9284
can_sample_self.memory = 9284

 76%|█████████████████████████████████████████████████████████████▏                  | 153/200 [05:59<01:48,  2.31s/it]

nan
can_sample_self.memory = 9343
can_sample_self.memory = 9343
nan
can_sample_self.memory = 9344
can_sample_self.memory = 9344
nan
can_sample_self.memory = 9345
can_sample_self.memory = 9345
nan
can_sample_self.memory = 9346
can_sample_self.memory = 9346
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9347
can_sample_self.memory = 9347
nan
can_sample_self.memory = 9348
can_sample_self.memory = 9348
nan
can_sample_self.memory = 9349
can_sample_self.memory = 9349
nan
can_sample_self.memory = 9350
can_sample_self.memory = 9350
nan


 77%|█████████████████████████████████████████████████████████████▌                  | 154/200 [06:00<01:21,  1.77s/it]

can_sample_self.memory = 9351
can_sample_self.memory = 9351
nan
can_sample_self.memory = 9352
can_sample_self.memory = 9352
nan
can_sample_self.memory = 9353
can_sample_self.memory = 9353
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9354
can_sample_self.memory = 9354
nan
can_sample_self.memory = 9355
can_sample_self.memory = 9355
nan
can_sample_self.memory = 9356
can_sample_self.memory = 9356
nan
can_sample_self.memory = 9357
can_sample_self.memory = 9357
nan
can_sample_self.memory = 9358
can_sample_self.memory = 9358
nan
can_sample_self.memory = 9359
can_sample_self.memory = 9359
nan
can_sample_self.memory = 9360
can_sample_self.memory = 9360
nan
can_sample_self.memory = 9361
can_sample_self.memory = 9361
nan
can_sample_self.memory = 9362
can_sample_self.memory = 9362
nan
can_sample_self.memory = 9363
can_sample_self.memory = 9363
nan
can_sample_self.memory = 9364
can_sample_self.memory = 9364
nan
can_sample_self.memory = 9365
can_sample_self.memory = 9365

 78%|██████████████████████████████████████████████████████████████                  | 155/200 [06:04<01:49,  2.44s/it]

nan
can_sample_self.memory = 9454
can_sample_self.memory = 9454
nan
can_sample_self.memory = 9455
can_sample_self.memory = 9455
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9456
can_sample_self.memory = 9456
nan
can_sample_self.memory = 9457
can_sample_self.memory = 9457
nan
can_sample_self.memory = 9458
can_sample_self.memory = 9458
nan
can_sample_self.memory = 9459
can_sample_self.memory = 9459
nan
can_sample_self.memory = 9460
can_sample_self.memory = 9460
nan
can_sample_self.memory = 9461
can_sample_self.memory = 9461
nan
can_sample_self.memory = 9462
can_sample_self.memory = 9462
nan
can_sample_self.memory = 9463
can_sample_self.memory = 9463
nan
can_sample_self.memory = 9464
can_sample_self.memory = 9464
nan
can_sample_self.memory = 9465
can_sample_self.memory = 9465
nan
can_sample_self.memory = 9466
can_sample_self.memory = 9466
nan
can_sample_self.memory = 9467
can_sample_self.memory = 9467
nan
can_sample_self.memory = 9468
can_sample_self.memory = 

 78%|██████████████████████████████████████████████████████████████▍                 | 156/200 [06:07<02:01,  2.77s/it]

nan
can_sample_self.memory = 9545
can_sample_self.memory = 9545
nan
can_sample_self.memory = 9546
can_sample_self.memory = 9546
nan
can_sample_self.memory = 9547
can_sample_self.memory = 9547
nan
can_sample_self.memory = 9548
can_sample_self.memory = 9548
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9549
can_sample_self.memory = 9549
nan
can_sample_self.memory = 9550
can_sample_self.memory = 9550
nan
can_sample_self.memory = 9551
can_sample_self.memory = 9551
nan
can_sample_self.memory = 9552
can_sample_self.memory = 9552
nan
can_sample_self.memory = 9553
can_sample_self.memory = 9553
nan
can_sample_self.memory = 9554
can_sample_self.memory = 9554
nan
can_sample_self.memory = 9555
can_sample_self.memory = 9555
nan
can_sample_self.memory = 9556
can_sample_self.memory = 9556
nan
can_sample_self.memory = 9557
can_sample_self.memory = 9557
nan
can_sample_self.memory = 9558
can_sample_self.memory = 9558
nan
can_sample_self.memory = 9559
can_sample_self.memory = 

 78%|██████████████████████████████████████████████████████████████▊                 | 157/200 [06:10<02:00,  2.81s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9625
can_sample_self.memory = 9625
nan
can_sample_self.memory = 9626
can_sample_self.memory = 9626
nan
can_sample_self.memory = 9627
can_sample_self.memory = 9627
nan
can_sample_self.memory = 9628
can_sample_self.memory = 9628
nan
can_sample_self.memory = 9629
can_sample_self.memory = 9629
nan
can_sample_self.memory = 9630
can_sample_self.memory = 9630
nan
can_sample_self.memory = 9631
can_sample_self.memory = 9631
nan
can_sample_self.memory = 9632
can_sample_self.memory = 9632
nan
can_sample_self.memory = 9633
can_sample_self.memory = 9633
nan
can_sample_self.memory = 9634
can_sample_self.memory = 9634
nan
can_sample_self.memory = 9635
can_sample_self.memory = 9635
nan
can_sample_self.memory = 9636
can_sample_self.memory = 9636
nan
can_sample_self.memory = 9637
can_sample_self.memory = 9637
nan
can_sample_self.memory = 9638
can_sample_self.memory = 9638
nan
can_sample_self.memory = 9639
can_sample_self.memory = 

 79%|███████████████████████████████████████████████████████████████▏                | 158/200 [06:12<01:50,  2.64s/it]

nan
can_sample_self.memory = 9678
can_sample_self.memory = 9678
nan
can_sample_self.memory = 9679
can_sample_self.memory = 9679
nan
can_sample_self.memory = 9680
can_sample_self.memory = 9680
nan
can_sample_self.memory = 9681
can_sample_self.memory = 9681
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9682
can_sample_self.memory = 9682
nan
can_sample_self.memory = 9683
can_sample_self.memory = 9683
nan
can_sample_self.memory = 9684
can_sample_self.memory = 9684
nan
can_sample_self.memory = 9685
can_sample_self.memory = 9685
nan
can_sample_self.memory = 9686
can_sample_self.memory = 9686
nan
can_sample_self.memory = 9687
can_sample_self.memory = 9687
nan
can_sample_self.memory = 9688
can_sample_self.memory = 9688
nan
can_sample_self.memory = 9689
can_sample_self.memory = 9689
nan
can_sample_self.memory = 9690
can_sample_self.memory = 9690
nan
can_sample_self.memory = 9691
can_sample_self.memory = 9691
nan
can_sample_self.memory = 9692
can_sample_self.memory = 

 80%|███████████████████████████████████████████████████████████████▌                | 159/200 [06:14<01:34,  2.31s/it]

nan
can_sample_self.memory = 9716
can_sample_self.memory = 9716
nan
can_sample_self.memory = 9717
can_sample_self.memory = 9717
nan
<<< episode >>>


 80%|████████████████████████████████████████████████████████████████                | 160/200 [06:14<01:09,  1.73s/it]

type=<class 'numpy.ndarray'>
can_sample_self.memory = 9718
can_sample_self.memory = 9718
nan
can_sample_self.memory = 9719
can_sample_self.memory = 9719
nan
can_sample_self.memory = 9720
can_sample_self.memory = 9720
nan
can_sample_self.memory = 9721
can_sample_self.memory = 9721
nan
can_sample_self.memory = 9722
can_sample_self.memory = 9722
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9723
can_sample_self.memory = 9723
nan
can_sample_self.memory = 9724
can_sample_self.memory = 9724
nan
can_sample_self.memory = 9725
can_sample_self.memory = 9725
nan
can_sample_self.memory = 9726
can_sample_self.memory = 9726
nan
can_sample_self.memory = 9727
can_sample_self.memory = 9727
nan
can_sample_self.memory = 9728
can_sample_self.memory = 9728
nan
can_sample_self.memory = 9729
can_sample_self.memory = 9729
nan
can_sample_self.memory = 9730
can_sample_self.memory = 9730
nan
can_sample_self.memory = 9731
can_sample_self.memory = 9731
nan
can_sample_self.memory = 9732


 80%|████████████████████████████████████████████████████████████████▍               | 161/200 [06:18<01:27,  2.24s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 9813
can_sample_self.memory = 9813
nan
can_sample_self.memory = 9814
can_sample_self.memory = 9814
nan
can_sample_self.memory = 9815
can_sample_self.memory = 9815
nan
can_sample_self.memory = 9816
can_sample_self.memory = 9816
nan
can_sample_self.memory = 9817
can_sample_self.memory = 9817
nan
can_sample_self.memory = 9818
can_sample_self.memory = 9818
nan
can_sample_self.memory = 9819
can_sample_self.memory = 9819
nan
can_sample_self.memory = 9820
can_sample_self.memory = 9820
nan
can_sample_self.memory = 9821
can_sample_self.memory = 9821
nan
can_sample_self.memory = 9822
can_sample_self.memory = 9822
nan
can_sample_self.memory = 9823
can_sample_self.memory = 9823
nan
can_sample_self.memory = 9824
can_sample_self.memory = 9824
nan
can_sample_self.memory = 9825
can_sample_self.memory = 9825
nan
can_sample_self.memory = 9826
can_sample_self.memory = 9826
nan
can_sample_self.memory = 9827
can_sample_self.memory = 

nan
can_sample_self.memory = 9941
can_sample_self.memory = 9941
nan
can_sample_self.memory = 9942
can_sample_self.memory = 9942
nan
can_sample_self.memory = 9943
can_sample_self.memory = 9943
nan
can_sample_self.memory = 9944
can_sample_self.memory = 9944
nan
can_sample_self.memory = 9945
can_sample_self.memory = 9945
nan
can_sample_self.memory = 9946
can_sample_self.memory = 9946
nan
can_sample_self.memory = 9947
can_sample_self.memory = 9947
nan
can_sample_self.memory = 9948
can_sample_self.memory = 9948
nan
can_sample_self.memory = 9949
can_sample_self.memory = 9949
nan
can_sample_self.memory = 9950
can_sample_self.memory = 9950
nan
can_sample_self.memory = 9951
can_sample_self.memory = 9951
nan
can_sample_self.memory = 9952
can_sample_self.memory = 9952
nan
can_sample_self.memory = 9953
can_sample_self.memory = 9953
nan
can_sample_self.memory = 9954
can_sample_self.memory = 9954
nan
can_sample_self.memory = 9955
can_sample_self.memory = 9955
nan
can_sample_self.memory = 9956
can_sa

nan
can_sample_self.memory = 10069
can_sample_self.memory = 10069
nan
can_sample_self.memory = 10070
can_sample_self.memory = 10070
nan
can_sample_self.memory = 10071
can_sample_self.memory = 10071
nan
can_sample_self.memory = 10072
can_sample_self.memory = 10072
nan
can_sample_self.memory = 10073
can_sample_self.memory = 10073
nan
can_sample_self.memory = 10074
can_sample_self.memory = 10074
nan
can_sample_self.memory = 10075
can_sample_self.memory = 10075
nan
can_sample_self.memory = 10076
can_sample_self.memory = 10076
nan
can_sample_self.memory = 10077
can_sample_self.memory = 10077
nan
can_sample_self.memory = 10078
can_sample_self.memory = 10078
nan
can_sample_self.memory = 10079
can_sample_self.memory = 10079
nan
can_sample_self.memory = 10080
can_sample_self.memory = 10080
nan
can_sample_self.memory = 10081
can_sample_self.memory = 10081
nan
can_sample_self.memory = 10082
can_sample_self.memory = 10082
nan
can_sample_self.memory = 10083
can_sample_self.memory = 10083
nan
can_sa

 81%|████████████████████████████████████████████████████████████████▊               | 162/200 [06:28<02:58,  4.71s/it]

nan
can_sample_self.memory = 10097
can_sample_self.memory = 10097
nan
can_sample_self.memory = 10098
can_sample_self.memory = 10098
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10099
can_sample_self.memory = 10099
nan
can_sample_self.memory = 10100
can_sample_self.memory = 10100
nan
can_sample_self.memory = 10101
can_sample_self.memory = 10101
nan
can_sample_self.memory = 10102
can_sample_self.memory = 10102
nan
can_sample_self.memory = 10103
can_sample_self.memory = 10103
nan
can_sample_self.memory = 10104
can_sample_self.memory = 10104
nan
can_sample_self.memory = 10105
can_sample_self.memory = 10105
nan
can_sample_self.memory = 10106
can_sample_self.memory = 10106
nan
can_sample_self.memory = 10107
can_sample_self.memory = 10107
nan
can_sample_self.memory = 10108
can_sample_self.memory = 10108
nan
can_sample_self.memory = 10109
can_sample_self.memory = 10109
nan
can_sample_self.memory = 10110
can_sample_self.memory = 10110
nan
can_sample_self.memory = 10

 82%|█████████████████████████████████████████████████████████████████▏              | 163/200 [06:29<02:14,  3.64s/it]

nan
can_sample_self.memory = 10121
can_sample_self.memory = 10121
nan
can_sample_self.memory = 10122
can_sample_self.memory = 10122
nan
can_sample_self.memory = 10123
can_sample_self.memory = 10123
nan
can_sample_self.memory = 10124
can_sample_self.memory = 10124
nan
can_sample_self.memory = 10125
can_sample_self.memory = 10125
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10126
can_sample_self.memory = 10126
nan
can_sample_self.memory = 10127
can_sample_self.memory = 10127
nan
can_sample_self.memory = 10128
can_sample_self.memory = 10128
nan
can_sample_self.memory = 10129
can_sample_self.memory = 10129
nan
can_sample_self.memory = 10130
can_sample_self.memory = 10130
nan
can_sample_self.memory = 10131
can_sample_self.memory = 10131
nan
can_sample_self.memory = 10132
can_sample_self.memory = 10132
nan
can_sample_self.memory = 10133
can_sample_self.memory = 10133
nan
can_sample_self.memory = 10134
can_sample_self.memory = 10134
nan
can_sample_self.memory = 10

nan
can_sample_self.memory = 10246
can_sample_self.memory = 10246
nan
can_sample_self.memory = 10247
can_sample_self.memory = 10247
nan
can_sample_self.memory = 10248
can_sample_self.memory = 10248
nan
can_sample_self.memory = 10249
can_sample_self.memory = 10249
nan
can_sample_self.memory = 10250
can_sample_self.memory = 10250
nan
can_sample_self.memory = 10251
can_sample_self.memory = 10251
nan
can_sample_self.memory = 10252
can_sample_self.memory = 10252
nan
can_sample_self.memory = 10253
can_sample_self.memory = 10253
nan
can_sample_self.memory = 10254
can_sample_self.memory = 10254
nan
can_sample_self.memory = 10255
can_sample_self.memory = 10255
nan
can_sample_self.memory = 10256
can_sample_self.memory = 10256
nan
can_sample_self.memory = 10257
can_sample_self.memory = 10257
nan
can_sample_self.memory = 10258
can_sample_self.memory = 10258
nan
can_sample_self.memory = 10259
can_sample_self.memory = 10259
nan
can_sample_self.memory = 10260
can_sample_self.memory = 10260
nan
can_sa

 82%|█████████████████████████████████████████████████████████████████▌              | 164/200 [06:38<03:08,  5.24s/it]

nan
can_sample_self.memory = 10369
can_sample_self.memory = 10369
nan
can_sample_self.memory = 10370
can_sample_self.memory = 10370
nan
can_sample_self.memory = 10371
can_sample_self.memory = 10371
nan
can_sample_self.memory = 10372
can_sample_self.memory = 10372
nan
can_sample_self.memory = 10373
can_sample_self.memory = 10373
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10374
can_sample_self.memory = 10374
nan
can_sample_self.memory = 10375
can_sample_self.memory = 10375
nan
can_sample_self.memory = 10376
can_sample_self.memory = 10376
nan
can_sample_self.memory = 10377
can_sample_self.memory = 10377
nan
can_sample_self.memory = 10378
can_sample_self.memory = 10378
nan
can_sample_self.memory = 10379
can_sample_self.memory = 10379
nan
can_sample_self.memory = 10380
can_sample_self.memory = 10380
nan
can_sample_self.memory = 10381
can_sample_self.memory = 10381
nan
can_sample_self.memory = 10382
can_sample_self.memory = 10382
nan
can_sample_self.memory = 10

 82%|██████████████████████████████████████████████████████████████████              | 165/200 [06:39<02:17,  3.92s/it]

can_sample_self.memory = 10391
can_sample_self.memory = 10391
nan
can_sample_self.memory = 10392
can_sample_self.memory = 10392
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10393
can_sample_self.memory = 10393
nan
can_sample_self.memory = 10394
can_sample_self.memory = 10394
nan
can_sample_self.memory = 10395
can_sample_self.memory = 10395
nan
can_sample_self.memory = 10396
can_sample_self.memory = 10396
nan
can_sample_self.memory = 10397
can_sample_self.memory = 10397
nan
can_sample_self.memory = 10398
can_sample_self.memory = 10398
nan
can_sample_self.memory = 10399
can_sample_self.memory = 10399
nan
can_sample_self.memory = 10400
can_sample_self.memory = 10400
nan
can_sample_self.memory = 10401
can_sample_self.memory = 10401
nan
can_sample_self.memory = 10402
can_sample_self.memory = 10402
nan
can_sample_self.memory = 10403
can_sample_self.memory = 10403
nan
can_sample_self.memory = 10404
can_sample_self.memory = 10404
nan
can_sample_self.memory = 10405


 83%|██████████████████████████████████████████████████████████████████▍             | 166/200 [06:41<01:55,  3.39s/it]

nan
can_sample_self.memory = 10445
can_sample_self.memory = 10445
nan
can_sample_self.memory = 10446
can_sample_self.memory = 10446
nan
can_sample_self.memory = 10447
can_sample_self.memory = 10447
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10448
can_sample_self.memory = 10448
nan
can_sample_self.memory = 10449
can_sample_self.memory = 10449
nan
can_sample_self.memory = 10450
can_sample_self.memory = 10450
nan
can_sample_self.memory = 10451
can_sample_self.memory = 10451
nan
can_sample_self.memory = 10452
can_sample_self.memory = 10452
nan
can_sample_self.memory = 10453
can_sample_self.memory = 10453
nan
can_sample_self.memory = 10454
can_sample_self.memory = 10454
nan
can_sample_self.memory = 10455
can_sample_self.memory = 10455
nan
can_sample_self.memory = 10456
can_sample_self.memory = 10456
nan
can_sample_self.memory = 10457
can_sample_self.memory = 10457
nan
can_sample_self.memory = 10458
can_sample_self.memory = 10458
nan
can_sample_self.memory = 10

nan
can_sample_self.memory = 10570
can_sample_self.memory = 10570
nan
can_sample_self.memory = 10571
can_sample_self.memory = 10571
nan
can_sample_self.memory = 10572
can_sample_self.memory = 10572
nan
can_sample_self.memory = 10573
can_sample_self.memory = 10573
nan
can_sample_self.memory = 10574
can_sample_self.memory = 10574
nan
can_sample_self.memory = 10575
can_sample_self.memory = 10575
nan
can_sample_self.memory = 10576
can_sample_self.memory = 10576
nan
can_sample_self.memory = 10577
can_sample_self.memory = 10577
nan
can_sample_self.memory = 10578
can_sample_self.memory = 10578
nan
can_sample_self.memory = 10579
can_sample_self.memory = 10579
nan


 84%|██████████████████████████████████████████████████████████████████▊             | 167/200 [06:46<02:07,  3.87s/it]

can_sample_self.memory = 10580
can_sample_self.memory = 10580
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10581
can_sample_self.memory = 10581
nan
can_sample_self.memory = 10582
can_sample_self.memory = 10582
nan
can_sample_self.memory = 10583
can_sample_self.memory = 10583
nan
can_sample_self.memory = 10584
can_sample_self.memory = 10584
nan
can_sample_self.memory = 10585
can_sample_self.memory = 10585
nan
can_sample_self.memory = 10586
can_sample_self.memory = 10586
nan
can_sample_self.memory = 10587
can_sample_self.memory = 10587
nan
can_sample_self.memory = 10588
can_sample_self.memory = 10588
nan
can_sample_self.memory = 10589
can_sample_self.memory = 10589
nan
can_sample_self.memory = 10590
can_sample_self.memory = 10590
nan
can_sample_self.memory = 10591
can_sample_self.memory = 10591
nan
can_sample_self.memory = 10592
can_sample_self.memory = 10592
nan
can_sample_self.memory = 10593
can_sample_self.memory = 10593
nan
can_sample_self.memory = 10594


 84%|███████████████████████████████████████████████████████████████████▏            | 168/200 [06:48<01:42,  3.19s/it]

nan
can_sample_self.memory = 10617
can_sample_self.memory = 10617
nan
can_sample_self.memory = 10618
can_sample_self.memory = 10618
nan
can_sample_self.memory = 10619
can_sample_self.memory = 10619
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10620
can_sample_self.memory = 10620
nan
can_sample_self.memory = 10621
can_sample_self.memory = 10621
nan
can_sample_self.memory = 10622
can_sample_self.memory = 10622
nan
can_sample_self.memory = 10623
can_sample_self.memory = 10623
nan
can_sample_self.memory = 10624
can_sample_self.memory = 10624
nan
can_sample_self.memory = 10625
can_sample_self.memory = 10625


 84%|███████████████████████████████████████████████████████████████████▌            | 169/200 [06:48<01:13,  2.37s/it]

nan
can_sample_self.memory = 10626
can_sample_self.memory = 10626
nan
can_sample_self.memory = 10627
can_sample_self.memory = 10627
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10628
can_sample_self.memory = 10628
nan
can_sample_self.memory = 10629
can_sample_self.memory = 10629
nan
can_sample_self.memory = 10630
can_sample_self.memory = 10630
nan
can_sample_self.memory = 10631
can_sample_self.memory = 10631
nan
can_sample_self.memory = 10632
can_sample_self.memory = 10632
nan
can_sample_self.memory = 10633
can_sample_self.memory = 10633
nan
can_sample_self.memory = 10634
can_sample_self.memory = 10634
nan
can_sample_self.memory = 10635
can_sample_self.memory = 10635
nan
can_sample_self.memory = 10636
can_sample_self.memory = 10636
nan
can_sample_self.memory = 10637
can_sample_self.memory = 10637
nan
can_sample_self.memory = 10638
can_sample_self.memory = 10638
nan
can_sample_self.memory = 10639
can_sample_self.memory = 10639
nan
can_sample_self.memory = 10

 85%|████████████████████████████████████████████████████████████████████            | 170/200 [06:50<01:03,  2.12s/it]

can_sample_self.memory = 10663
can_sample_self.memory = 10663
nan
can_sample_self.memory = 10664
can_sample_self.memory = 10664
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10665
can_sample_self.memory = 10665
nan
can_sample_self.memory = 10666
can_sample_self.memory = 10666
nan
can_sample_self.memory = 10667
can_sample_self.memory = 10667
nan
can_sample_self.memory = 10668
can_sample_self.memory = 10668
nan
can_sample_self.memory = 10669
can_sample_self.memory = 10669
nan
can_sample_self.memory = 10670
can_sample_self.memory = 10670
nan
can_sample_self.memory = 10671
can_sample_self.memory = 10671
nan
can_sample_self.memory = 10672
can_sample_self.memory = 10672
nan
can_sample_self.memory = 10673
can_sample_self.memory = 10673
nan
can_sample_self.memory = 10674
can_sample_self.memory = 10674
nan
can_sample_self.memory = 10675
can_sample_self.memory = 10675
nan
can_sample_self.memory = 10676
can_sample_self.memory = 10676
nan
can_sample_self.memory = 10677


 86%|████████████████████████████████████████████████████████████████████▍           | 171/200 [06:54<01:22,  2.83s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10786
can_sample_self.memory = 10786
nan
can_sample_self.memory = 10787
can_sample_self.memory = 10787
nan
can_sample_self.memory = 10788
can_sample_self.memory = 10788
nan
can_sample_self.memory = 10789
can_sample_self.memory = 10789
nan
can_sample_self.memory = 10790
can_sample_self.memory = 10790
nan
can_sample_self.memory = 10791
can_sample_self.memory = 10791
nan
can_sample_self.memory = 10792
can_sample_self.memory = 10792
nan
can_sample_self.memory = 10793
can_sample_self.memory = 10793
nan
can_sample_self.memory = 10794
can_sample_self.memory = 10794
nan
can_sample_self.memory = 10795
can_sample_self.memory = 10795
nan
can_sample_self.memory = 10796
can_sample_self.memory = 10796
nan
can_sample_self.memory = 10797
can_sample_self.memory = 10797
nan


 86%|████████████████████████████████████████████████████████████████████▊           | 172/200 [06:55<01:00,  2.18s/it]

can_sample_self.memory = 10798
can_sample_self.memory = 10798
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10799
can_sample_self.memory = 10799
nan
can_sample_self.memory = 10800
can_sample_self.memory = 10800
nan
can_sample_self.memory = 10801
can_sample_self.memory = 10801
nan
can_sample_self.memory = 10802
can_sample_self.memory = 10802
nan
can_sample_self.memory = 10803
can_sample_self.memory = 10803
nan
can_sample_self.memory = 10804
can_sample_self.memory = 10804
nan
can_sample_self.memory = 10805
can_sample_self.memory = 10805
nan
can_sample_self.memory = 10806
can_sample_self.memory = 10806
nan
can_sample_self.memory = 10807
can_sample_self.memory = 10807
nan
can_sample_self.memory = 10808
can_sample_self.memory = 10808
nan
can_sample_self.memory = 10809
can_sample_self.memory = 10809
nan
can_sample_self.memory = 10810
can_sample_self.memory = 10810
nan
can_sample_self.memory = 10811
can_sample_self.memory = 10811
nan
can_sample_self.memory = 10812


 86%|█████████████████████████████████████████████████████████████████████▏          | 173/200 [06:59<01:10,  2.60s/it]

can_sample_self.memory = 10892
can_sample_self.memory = 10892
nan
can_sample_self.memory = 10893
can_sample_self.memory = 10893
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10894
can_sample_self.memory = 10894
nan
can_sample_self.memory = 10895
can_sample_self.memory = 10895
nan
can_sample_self.memory = 10896
can_sample_self.memory = 10896
nan
can_sample_self.memory = 10897
can_sample_self.memory = 10897
nan
can_sample_self.memory = 10898
can_sample_self.memory = 10898
nan
can_sample_self.memory = 10899
can_sample_self.memory = 10899
nan
can_sample_self.memory = 10900
can_sample_self.memory = 10900
nan
can_sample_self.memory = 10901
can_sample_self.memory = 10901
nan
can_sample_self.memory = 10902
can_sample_self.memory = 10902
nan
can_sample_self.memory = 10903
can_sample_self.memory = 10903
nan
can_sample_self.memory = 10904
can_sample_self.memory = 10904
nan
can_sample_self.memory = 10905
can_sample_self.memory = 10905
nan
can_sample_self.memory = 10906


 87%|█████████████████████████████████████████████████████████████████████▌          | 174/200 [07:01<01:05,  2.50s/it]

<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10951
can_sample_self.memory = 10951
nan
can_sample_self.memory = 10952
can_sample_self.memory = 10952
nan
can_sample_self.memory = 10953
can_sample_self.memory = 10953
nan
can_sample_self.memory = 10954
can_sample_self.memory = 10954
nan
can_sample_self.memory = 10955
can_sample_self.memory = 10955
nan
can_sample_self.memory = 10956
can_sample_self.memory = 10956
nan
can_sample_self.memory = 10957
can_sample_self.memory = 10957
nan
can_sample_self.memory = 10958
can_sample_self.memory = 10958
nan
can_sample_self.memory = 10959
can_sample_self.memory = 10959
nan
can_sample_self.memory = 10960
can_sample_self.memory = 10960
nan
can_sample_self.memory = 10961
can_sample_self.memory = 10961
nan
can_sample_self.memory = 10962
can_sample_self.memory = 10962
nan
can_sample_self.memory = 10963
can_sample_self.memory = 10963
nan
can_sample_self.memory = 10964
can_sample_self.memory = 10964
nan
can_sample_self.memory = 10965


 88%|██████████████████████████████████████████████████████████████████████          | 175/200 [07:02<00:53,  2.13s/it]

can_sample_self.memory = 10980
can_sample_self.memory = 10980
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10981
can_sample_self.memory = 10981
nan
can_sample_self.memory = 10982
can_sample_self.memory = 10982
nan
can_sample_self.memory = 10983
can_sample_self.memory = 10983
nan
can_sample_self.memory = 10984
can_sample_self.memory = 10984
nan
can_sample_self.memory = 10985
can_sample_self.memory = 10985
nan
can_sample_self.memory = 10986
can_sample_self.memory = 10986


 88%|██████████████████████████████████████████████████████████████████████▍         | 176/200 [07:03<00:39,  1.63s/it]

nan
can_sample_self.memory = 10987
can_sample_self.memory = 10987
nan
can_sample_self.memory = 10988
can_sample_self.memory = 10988
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 10989
can_sample_self.memory = 10989
nan
can_sample_self.memory = 10990
can_sample_self.memory = 10990
nan
can_sample_self.memory = 10991
can_sample_self.memory = 10991
nan
can_sample_self.memory = 10992
can_sample_self.memory = 10992
nan
can_sample_self.memory = 10993
can_sample_self.memory = 10993
nan
can_sample_self.memory = 10994
can_sample_self.memory = 10994
nan
can_sample_self.memory = 10995
can_sample_self.memory = 10995
nan
can_sample_self.memory = 10996
can_sample_self.memory = 10996
nan
can_sample_self.memory = 10997
can_sample_self.memory = 10997
nan
can_sample_self.memory = 10998
can_sample_self.memory = 10998
nan
can_sample_self.memory = 10999
can_sample_self.memory = 10999
nan


 88%|██████████████████████████████████████████████████████████████████████▊         | 177/200 [07:03<00:30,  1.34s/it]

can_sample_self.memory = 11000
can_sample_self.memory = 11000
nan
can_sample_self.memory = 11001
can_sample_self.memory = 11001
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11002
can_sample_self.memory = 11002
nan
can_sample_self.memory = 11003
can_sample_self.memory = 11003
nan
can_sample_self.memory = 11004
can_sample_self.memory = 11004
nan
can_sample_self.memory = 11005
can_sample_self.memory = 11005
nan
can_sample_self.memory = 11006
can_sample_self.memory = 11006
nan
can_sample_self.memory = 11007
can_sample_self.memory = 11007
nan
can_sample_self.memory = 11008
can_sample_self.memory = 11008
nan
can_sample_self.memory = 11009
can_sample_self.memory = 11009
nan
can_sample_self.memory = 11010
can_sample_self.memory = 11010
nan
can_sample_self.memory = 11011
can_sample_self.memory = 11011
nan
can_sample_self.memory = 11012
can_sample_self.memory = 11012
nan
can_sample_self.memory = 11013
can_sample_self.memory = 11013
nan
can_sample_self.memory = 11014


 89%|███████████████████████████████████████████████████████████████████████▏        | 178/200 [07:06<00:36,  1.67s/it]

can_sample_self.memory = 11063
can_sample_self.memory = 11063
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11064
can_sample_self.memory = 11064
nan
can_sample_self.memory = 11065
can_sample_self.memory = 11065
nan
can_sample_self.memory = 11066
can_sample_self.memory = 11066
nan
can_sample_self.memory = 11067
can_sample_self.memory = 11067
nan
can_sample_self.memory = 11068
can_sample_self.memory = 11068
nan
can_sample_self.memory = 11069
can_sample_self.memory = 11069
nan
can_sample_self.memory = 11070
can_sample_self.memory = 11070
nan
can_sample_self.memory = 11071
can_sample_self.memory = 11071
nan
can_sample_self.memory = 11072
can_sample_self.memory = 11072
nan
can_sample_self.memory = 11073
can_sample_self.memory = 11073
nan
can_sample_self.memory = 11074
can_sample_self.memory = 11074
nan
can_sample_self.memory = 11075
can_sample_self.memory = 11075
nan
can_sample_self.memory = 11076
can_sample_self.memory = 11076
nan
can_sample_self.memory = 11077


nan
can_sample_self.memory = 11188
can_sample_self.memory = 11188
nan
can_sample_self.memory = 11189
can_sample_self.memory = 11189
nan
can_sample_self.memory = 11190
can_sample_self.memory = 11190
nan
can_sample_self.memory = 11191
can_sample_self.memory = 11191
nan
can_sample_self.memory = 11192
can_sample_self.memory = 11192


 90%|███████████████████████████████████████████████████████████████████████▌        | 179/200 [07:10<00:54,  2.61s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11193
can_sample_self.memory = 11193
nan
can_sample_self.memory = 11194
can_sample_self.memory = 11194
nan
can_sample_self.memory = 11195
can_sample_self.memory = 11195
nan
can_sample_self.memory = 11196
can_sample_self.memory = 11196
nan
can_sample_self.memory = 11197
can_sample_self.memory = 11197
nan
can_sample_self.memory = 11198
can_sample_self.memory = 11198
nan
can_sample_self.memory = 11199
can_sample_self.memory = 11199
nan
can_sample_self.memory = 11200
can_sample_self.memory = 11200
nan
can_sample_self.memory = 11201
can_sample_self.memory = 11201
nan
can_sample_self.memory = 11202
can_sample_self.memory = 11202
nan
can_sample_self.memory = 11203
can_sample_self.memory = 11203
nan
can_sample_self.memory = 11204
can_sample_self.memory = 11204
nan
can_sample_self.memory = 11205
can_sample_self.memory = 11205
nan
can_sample_self.memory = 11206
can_sample_self.memory = 11206
nan
can_sample_self.memory = 11

 90%|████████████████████████████████████████████████████████████████████████        | 180/200 [07:14<00:55,  2.77s/it]

can_sample_self.memory = 11272
can_sample_self.memory = 11272
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11273
can_sample_self.memory = 11273
nan
can_sample_self.memory = 11274
can_sample_self.memory = 11274
nan
can_sample_self.memory = 11275
can_sample_self.memory = 11275
nan
can_sample_self.memory = 11276
can_sample_self.memory = 11276
nan
can_sample_self.memory = 11277
can_sample_self.memory = 11277
nan
can_sample_self.memory = 11278
can_sample_self.memory = 11278
nan
can_sample_self.memory = 11279
can_sample_self.memory = 11279
nan
can_sample_self.memory = 11280
can_sample_self.memory = 11280
nan
can_sample_self.memory = 11281
can_sample_self.memory = 11281
nan
can_sample_self.memory = 11282
can_sample_self.memory = 11282
nan
can_sample_self.memory = 11283
can_sample_self.memory = 11283
nan
can_sample_self.memory = 11284
can_sample_self.memory = 11284
nan
can_sample_self.memory = 11285
can_sample_self.memory = 11285
nan
can_sample_self.memory = 11286


 90%|████████████████████████████████████████████████████████████████████████▍       | 181/200 [07:18<01:00,  3.17s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11384
can_sample_self.memory = 11384
nan
can_sample_self.memory = 11385
can_sample_self.memory = 11385
nan
can_sample_self.memory = 11386
can_sample_self.memory = 11386
nan
can_sample_self.memory = 11387
can_sample_self.memory = 11387
nan
can_sample_self.memory = 11388
can_sample_self.memory = 11388
nan
can_sample_self.memory = 11389
can_sample_self.memory = 11389
nan
can_sample_self.memory = 11390
can_sample_self.memory = 11390
nan
can_sample_self.memory = 11391
can_sample_self.memory = 11391
nan
can_sample_self.memory = 11392
can_sample_self.memory = 11392
nan
can_sample_self.memory = 11393
can_sample_self.memory = 11393
nan
can_sample_self.memory = 11394
can_sample_self.memory = 11394
nan
can_sample_self.memory = 11395
can_sample_self.memory = 11395
nan
can_sample_self.memory = 11396
can_sample_self.memory = 11396
nan
can_sample_self.memory = 11397
can_sample_self.memory = 11397
nan
can_sample_self.memory = 11

 91%|████████████████████████████████████████████████████████████████████████▊       | 182/200 [07:19<00:48,  2.67s/it]

nan
can_sample_self.memory = 11420
can_sample_self.memory = 11420
nan
can_sample_self.memory = 11421
can_sample_self.memory = 11421
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11422
can_sample_self.memory = 11422
nan
can_sample_self.memory = 11423
can_sample_self.memory = 11423
nan
can_sample_self.memory = 11424
can_sample_self.memory = 11424
nan
can_sample_self.memory = 11425
can_sample_self.memory = 11425
nan
can_sample_self.memory = 11426
can_sample_self.memory = 11426
nan
can_sample_self.memory = 11427
can_sample_self.memory = 11427
nan
can_sample_self.memory = 11428
can_sample_self.memory = 11428
nan
can_sample_self.memory = 11429
can_sample_self.memory = 11429
nan
can_sample_self.memory = 11430
can_sample_self.memory = 11430
nan
can_sample_self.memory = 11431
can_sample_self.memory = 11431
nan
can_sample_self.memory = 11432
can_sample_self.memory = 11432
nan
can_sample_self.memory = 11433
can_sample_self.memory = 11433
nan
can_sample_self.memory = 11

 92%|█████████████████████████████████████████████████████████████████████████▏      | 183/200 [07:24<00:53,  3.17s/it]

nan
can_sample_self.memory = 11533
can_sample_self.memory = 11533
nan
can_sample_self.memory = 11534
can_sample_self.memory = 11534
nan
can_sample_self.memory = 11535
can_sample_self.memory = 11535
nan
can_sample_self.memory = 11536
can_sample_self.memory = 11536
nan
can_sample_self.memory = 11537
can_sample_self.memory = 11537
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11538
can_sample_self.memory = 11538
nan
can_sample_self.memory = 11539
can_sample_self.memory = 11539
nan
can_sample_self.memory = 11540
can_sample_self.memory = 11540
nan
can_sample_self.memory = 11541
can_sample_self.memory = 11541
nan
can_sample_self.memory = 11542
can_sample_self.memory = 11542
nan
can_sample_self.memory = 11543
can_sample_self.memory = 11543
nan
can_sample_self.memory = 11544
can_sample_self.memory = 11544
nan
can_sample_self.memory = 11545
can_sample_self.memory = 11545
nan
can_sample_self.memory = 11546
can_sample_self.memory = 11546
nan
can_sample_self.memory = 11

 92%|█████████████████████████████████████████████████████████████████████████▌      | 184/200 [07:25<00:43,  2.72s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11580
can_sample_self.memory = 11580
nan
can_sample_self.memory = 11581
can_sample_self.memory = 11581
nan
can_sample_self.memory = 11582
can_sample_self.memory = 11582
nan
can_sample_self.memory = 11583
can_sample_self.memory = 11583
nan
can_sample_self.memory = 11584
can_sample_self.memory = 11584
nan
can_sample_self.memory = 11585
can_sample_self.memory = 11585
nan
can_sample_self.memory = 11586
can_sample_self.memory = 11586
nan
can_sample_self.memory = 11587
can_sample_self.memory = 11587
nan
can_sample_self.memory = 11588
can_sample_self.memory = 11588
nan
can_sample_self.memory = 11589
can_sample_self.memory = 11589
nan
can_sample_self.memory = 11590
can_sample_self.memory = 11590
nan
can_sample_self.memory = 11591
can_sample_self.memory = 11591
nan
can_sample_self.memory = 11592
can_sample_self.memory = 11592
nan
can_sample_self.memory = 11593
can_sample_self.memory = 11593
nan
can_sample_self.memory = 11

can_sample_self.memory = 11705
can_sample_self.memory = 11705
nan
can_sample_self.memory = 11706
can_sample_self.memory = 11706
nan
can_sample_self.memory = 11707
can_sample_self.memory = 11707
nan
can_sample_self.memory = 11708
can_sample_self.memory = 11708
nan
can_sample_self.memory = 11709
can_sample_self.memory = 11709
nan
can_sample_self.memory = 11710
can_sample_self.memory = 11710
nan
can_sample_self.memory = 11711
can_sample_self.memory = 11711
nan
can_sample_self.memory = 11712
can_sample_self.memory = 11712
nan
can_sample_self.memory = 11713
can_sample_self.memory = 11713
nan
can_sample_self.memory = 11714
can_sample_self.memory = 11714
nan
can_sample_self.memory = 11715
can_sample_self.memory = 11715
nan
can_sample_self.memory = 11716
can_sample_self.memory = 11716
nan
can_sample_self.memory = 11717
can_sample_self.memory = 11717
nan
can_sample_self.memory = 11718
can_sample_self.memory = 11718
nan
can_sample_self.memory = 11719
can_sample_self.memory = 11719
nan
can_sample

 92%|██████████████████████████████████████████████████████████████████████████      | 185/200 [07:33<01:02,  4.18s/it]

nan
can_sample_self.memory = 11781
can_sample_self.memory = 11781
nan
can_sample_self.memory = 11782
can_sample_self.memory = 11782
nan
can_sample_self.memory = 11783
can_sample_self.memory = 11783
nan
can_sample_self.memory = 11784
can_sample_self.memory = 11784
nan
can_sample_self.memory = 11785
can_sample_self.memory = 11785
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11786
can_sample_self.memory = 11786
nan
can_sample_self.memory = 11787
can_sample_self.memory = 11787
nan
can_sample_self.memory = 11788
can_sample_self.memory = 11788
nan
can_sample_self.memory = 11789
can_sample_self.memory = 11789
nan
can_sample_self.memory = 11790
can_sample_self.memory = 11790
nan
can_sample_self.memory = 11791
can_sample_self.memory = 11791
nan
can_sample_self.memory = 11792
can_sample_self.memory = 11792
nan
can_sample_self.memory = 11793
can_sample_self.memory = 11793
nan
can_sample_self.memory = 11794
can_sample_self.memory = 11794
nan
can_sample_self.memory = 11

 93%|██████████████████████████████████████████████████████████████████████████▍     | 186/200 [07:36<00:54,  3.91s/it]

can_sample_self.memory = 11866
can_sample_self.memory = 11866
nan
can_sample_self.memory = 11867
can_sample_self.memory = 11867
nan
can_sample_self.memory = 11868
can_sample_self.memory = 11868
nan
can_sample_self.memory = 11869
can_sample_self.memory = 11869
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11870
can_sample_self.memory = 11870
nan
can_sample_self.memory = 11871
can_sample_self.memory = 11871
nan
can_sample_self.memory = 11872
can_sample_self.memory = 11872
nan
can_sample_self.memory = 11873
can_sample_self.memory = 11873
nan
can_sample_self.memory = 11874
can_sample_self.memory = 11874
nan
can_sample_self.memory = 11875
can_sample_self.memory = 11875
nan
can_sample_self.memory = 11876
can_sample_self.memory = 11876
nan
can_sample_self.memory = 11877
can_sample_self.memory = 11877
nan
can_sample_self.memory = 11878
can_sample_self.memory = 11878
nan
can_sample_self.memory = 11879
can_sample_self.memory = 11879
nan
can_sample_self.memory = 11880


 94%|██████████████████████████████████████████████████████████████████████████▊     | 187/200 [07:37<00:39,  3.06s/it]

nan
can_sample_self.memory = 11893
can_sample_self.memory = 11893
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 11894
can_sample_self.memory = 11894
nan
can_sample_self.memory = 11895
can_sample_self.memory = 11895
nan
can_sample_self.memory = 11896
can_sample_self.memory = 11896
nan
can_sample_self.memory = 11897
can_sample_self.memory = 11897
nan
can_sample_self.memory = 11898
can_sample_self.memory = 11898
nan
can_sample_self.memory = 11899
can_sample_self.memory = 11899
nan
can_sample_self.memory = 11900
can_sample_self.memory = 11900
nan
can_sample_self.memory = 11901
can_sample_self.memory = 11901
nan
can_sample_self.memory = 11902
can_sample_self.memory = 11902
nan
can_sample_self.memory = 11903
can_sample_self.memory = 11903
nan
can_sample_self.memory = 11904
can_sample_self.memory = 11904
nan
can_sample_self.memory = 11905
can_sample_self.memory = 11905
nan
can_sample_self.memory = 11906
can_sample_self.memory = 11906
nan
can_sample_self.memory = 11

nan
can_sample_self.memory = 12018
can_sample_self.memory = 12018
nan
can_sample_self.memory = 12019
can_sample_self.memory = 12019
nan
can_sample_self.memory = 12020
can_sample_self.memory = 12020
nan
can_sample_self.memory = 12021
can_sample_self.memory = 12021
nan
can_sample_self.memory = 12022
can_sample_self.memory = 12022
nan
can_sample_self.memory = 12023
can_sample_self.memory = 12023
nan
can_sample_self.memory = 12024
can_sample_self.memory = 12024
nan
can_sample_self.memory = 12025
can_sample_self.memory = 12025
nan
can_sample_self.memory = 12026
can_sample_self.memory = 12026
nan
can_sample_self.memory = 12027
can_sample_self.memory = 12027
nan
can_sample_self.memory = 12028
can_sample_self.memory = 12028
nan
can_sample_self.memory = 12029
can_sample_self.memory = 12029
nan
can_sample_self.memory = 12030
can_sample_self.memory = 12030
nan
can_sample_self.memory = 12031
can_sample_self.memory = 12031
nan
can_sample_self.memory = 12032
can_sample_self.memory = 12032
nan
can_sa

 94%|███████████████████████████████████████████████████████████████████████████▏    | 188/200 [07:42<00:44,  3.73s/it]

can_sample_self.memory = 12035
can_sample_self.memory = 12035
nan
can_sample_self.memory = 12036
can_sample_self.memory = 12036
nan
can_sample_self.memory = 12037
can_sample_self.memory = 12037
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12038
can_sample_self.memory = 12038
nan
can_sample_self.memory = 12039
can_sample_self.memory = 12039
nan
can_sample_self.memory = 12040
can_sample_self.memory = 12040
nan
can_sample_self.memory = 12041
can_sample_self.memory = 12041
nan
can_sample_self.memory = 12042
can_sample_self.memory = 12042
nan
can_sample_self.memory = 12043
can_sample_self.memory = 12043
nan
can_sample_self.memory = 12044
can_sample_self.memory = 12044
nan
can_sample_self.memory = 12045
can_sample_self.memory = 12045
nan
can_sample_self.memory = 12046
can_sample_self.memory = 12046
nan
can_sample_self.memory = 12047
can_sample_self.memory = 12047
nan
can_sample_self.memory = 12048
can_sample_self.memory = 12048
nan
can_sample_self.memory = 12049


nan
can_sample_self.memory = 12159
can_sample_self.memory = 12159
nan
can_sample_self.memory = 12160
can_sample_self.memory = 12160
nan
can_sample_self.memory = 12161
can_sample_self.memory = 12161
nan
can_sample_self.memory = 12162
can_sample_self.memory = 12162
nan
can_sample_self.memory = 12163
can_sample_self.memory = 12163
nan
can_sample_self.memory = 12164
can_sample_self.memory = 12164
nan
can_sample_self.memory = 12165
can_sample_self.memory = 12165
nan
can_sample_self.memory = 12166
can_sample_self.memory = 12166
nan
can_sample_self.memory = 12167
can_sample_self.memory = 12167
nan
can_sample_self.memory = 12168
can_sample_self.memory = 12168
nan
can_sample_self.memory = 12169
can_sample_self.memory = 12169
nan
can_sample_self.memory = 12170
can_sample_self.memory = 12170
nan
can_sample_self.memory = 12171
can_sample_self.memory = 12171
nan
can_sample_self.memory = 12172
can_sample_self.memory = 12172
nan
can_sample_self.memory = 12173
can_sample_self.memory = 12173
nan
can_sa

 94%|███████████████████████████████████████████████████████████████████████████▌    | 189/200 [07:48<00:47,  4.33s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12194
can_sample_self.memory = 12194
nan
can_sample_self.memory = 12195
can_sample_self.memory = 12195
nan
can_sample_self.memory = 12196
can_sample_self.memory = 12196
nan
can_sample_self.memory = 12197
can_sample_self.memory = 12197
nan
can_sample_self.memory = 12198
can_sample_self.memory = 12198
nan
can_sample_self.memory = 12199
can_sample_self.memory = 12199
nan
can_sample_self.memory = 12200
can_sample_self.memory = 12200
nan
can_sample_self.memory = 12201
can_sample_self.memory = 12201
nan
can_sample_self.memory = 12202
can_sample_self.memory = 12202
nan
can_sample_self.memory = 12203
can_sample_self.memory = 12203
nan
can_sample_self.memory = 12204
can_sample_self.memory = 12204
nan
can_sample_self.memory = 12205
can_sample_self.memory = 12205
nan
can_sample_self.memory = 12206
can_sample_self.memory = 12206
nan
can_sample_self.memory = 12207
can_sample_self.memory = 12207
nan
can_sample_self.memory = 12

 95%|████████████████████████████████████████████████████████████████████████████    | 190/200 [07:50<00:37,  3.72s/it]

nan
can_sample_self.memory = 12253
can_sample_self.memory = 12253
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12254
can_sample_self.memory = 12254
nan
can_sample_self.memory = 12255
can_sample_self.memory = 12255
nan
can_sample_self.memory = 12256
can_sample_self.memory = 12256
nan
can_sample_self.memory = 12257
can_sample_self.memory = 12257
nan
can_sample_self.memory = 12258
can_sample_self.memory = 12258
nan
can_sample_self.memory = 12259
can_sample_self.memory = 12259
nan
can_sample_self.memory = 12260
can_sample_self.memory = 12260
nan
can_sample_self.memory = 12261
can_sample_self.memory = 12261
nan
can_sample_self.memory = 12262
can_sample_self.memory = 12262
nan
can_sample_self.memory = 12263
can_sample_self.memory = 12263
nan
can_sample_self.memory = 12264
can_sample_self.memory = 12264
nan
can_sample_self.memory = 12265
can_sample_self.memory = 12265


 96%|████████████████████████████████████████████████████████████████████████████▍   | 191/200 [07:51<00:25,  2.79s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12266
can_sample_self.memory = 12266
nan
can_sample_self.memory = 12267
can_sample_self.memory = 12267
nan
can_sample_self.memory = 12268
can_sample_self.memory = 12268
nan
can_sample_self.memory = 12269
can_sample_self.memory = 12269
nan
can_sample_self.memory = 12270
can_sample_self.memory = 12270
nan
can_sample_self.memory = 12271
can_sample_self.memory = 12271
nan
can_sample_self.memory = 12272
can_sample_self.memory = 12272
nan
can_sample_self.memory = 12273
can_sample_self.memory = 12273
nan
can_sample_self.memory = 12274
can_sample_self.memory = 12274
nan
can_sample_self.memory = 12275
can_sample_self.memory = 12275
nan
can_sample_self.memory = 12276
can_sample_self.memory = 12276
nan
can_sample_self.memory = 12277
can_sample_self.memory = 12277
nan
can_sample_self.memory = 12278
can_sample_self.memory = 12278
nan
can_sample_self.memory = 12279
can_sample_self.memory = 12279
nan
can_sample_self.memory = 12

 96%|████████████████████████████████████████████████████████████████████████████▊   | 192/200 [07:53<00:20,  2.51s/it]

nan
can_sample_self.memory = 12308
can_sample_self.memory = 12308
nan
can_sample_self.memory = 12309
can_sample_self.memory = 12309
nan
can_sample_self.memory = 12310
can_sample_self.memory = 12310
nan
can_sample_self.memory = 12311
can_sample_self.memory = 12311
nan
can_sample_self.memory = 12312
can_sample_self.memory = 12312
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12313
can_sample_self.memory = 12313
nan
can_sample_self.memory = 12314
can_sample_self.memory = 12314
nan
can_sample_self.memory = 12315
can_sample_self.memory = 12315
nan
can_sample_self.memory = 12316
can_sample_self.memory = 12316
nan
can_sample_self.memory = 12317
can_sample_self.memory = 12317
nan
can_sample_self.memory = 12318
can_sample_self.memory = 12318
nan
can_sample_self.memory = 12319
can_sample_self.memory = 12319
nan
can_sample_self.memory = 12320
can_sample_self.memory = 12320
nan
can_sample_self.memory = 12321
can_sample_self.memory = 12321
nan
can_sample_self.memory = 12

 96%|█████████████████████████████████████████████████████████████████████████████▏  | 193/200 [07:57<00:21,  3.05s/it]

nan
can_sample_self.memory = 12425
can_sample_self.memory = 12425
nan
can_sample_self.memory = 12426
can_sample_self.memory = 12426
nan
can_sample_self.memory = 12427
can_sample_self.memory = 12427
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12428
can_sample_self.memory = 12428
nan
can_sample_self.memory = 12429
can_sample_self.memory = 12429
nan
can_sample_self.memory = 12430
can_sample_self.memory = 12430
nan
can_sample_self.memory = 12431
can_sample_self.memory = 12431
nan
can_sample_self.memory = 12432
can_sample_self.memory = 12432
nan
can_sample_self.memory = 12433
can_sample_self.memory = 12433
nan
can_sample_self.memory = 12434
can_sample_self.memory = 12434
nan
can_sample_self.memory = 12435
can_sample_self.memory = 12435
nan
can_sample_self.memory = 12436
can_sample_self.memory = 12436
nan
can_sample_self.memory = 12437
can_sample_self.memory = 12437
nan
can_sample_self.memory = 12438
can_sample_self.memory = 12438
nan
can_sample_self.memory = 12

 97%|█████████████████████████████████████████████████████████████████████████████▌  | 194/200 [07:59<00:15,  2.58s/it]

nan
can_sample_self.memory = 12461
can_sample_self.memory = 12461
nan
can_sample_self.memory = 12462
can_sample_self.memory = 12462
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12463
can_sample_self.memory = 12463
nan
can_sample_self.memory = 12464
can_sample_self.memory = 12464
nan
can_sample_self.memory = 12465
can_sample_self.memory = 12465
nan
can_sample_self.memory = 12466
can_sample_self.memory = 12466
nan
can_sample_self.memory = 12467
can_sample_self.memory = 12467
nan
can_sample_self.memory = 12468
can_sample_self.memory = 12468
nan
can_sample_self.memory = 12469
can_sample_self.memory = 12469
nan
can_sample_self.memory = 12470
can_sample_self.memory = 12470
nan
can_sample_self.memory = 12471
can_sample_self.memory = 12471
nan
can_sample_self.memory = 12472
can_sample_self.memory = 12472
nan
can_sample_self.memory = 12473
can_sample_self.memory = 12473
nan
can_sample_self.memory = 12474
can_sample_self.memory = 12474
nan
can_sample_self.memory = 12

 98%|██████████████████████████████████████████████████████████████████████████████  | 195/200 [08:00<00:10,  2.15s/it]

nan
can_sample_self.memory = 12486
can_sample_self.memory = 12486
nan
can_sample_self.memory = 12487
can_sample_self.memory = 12487
nan
can_sample_self.memory = 12488
can_sample_self.memory = 12488
nan
can_sample_self.memory = 12489
can_sample_self.memory = 12489
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12490
can_sample_self.memory = 12490
nan
can_sample_self.memory = 12491
can_sample_self.memory = 12491
nan
can_sample_self.memory = 12492
can_sample_self.memory = 12492
nan
can_sample_self.memory = 12493
can_sample_self.memory = 12493
nan
can_sample_self.memory = 12494
can_sample_self.memory = 12494
nan
can_sample_self.memory = 12495
can_sample_self.memory = 12495
nan
can_sample_self.memory = 12496
can_sample_self.memory = 12496
nan
can_sample_self.memory = 12497
can_sample_self.memory = 12497
nan
can_sample_self.memory = 12498
can_sample_self.memory = 12498
nan
can_sample_self.memory = 12499
can_sample_self.memory = 12499
nan
can_sample_self.memory = 12

 98%|██████████████████████████████████████████████████████████████████████████████▍ | 196/200 [08:01<00:07,  1.98s/it]

nan
can_sample_self.memory = 12524
can_sample_self.memory = 12524
nan
can_sample_self.memory = 12525
can_sample_self.memory = 12525
nan
can_sample_self.memory = 12526
can_sample_self.memory = 12526
nan
can_sample_self.memory = 12527
can_sample_self.memory = 12527
nan
can_sample_self.memory = 12528
can_sample_self.memory = 12528
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12529
can_sample_self.memory = 12529
nan
can_sample_self.memory = 12530
can_sample_self.memory = 12530
nan
can_sample_self.memory = 12531
can_sample_self.memory = 12531
nan
can_sample_self.memory = 12532
can_sample_self.memory = 12532
nan
can_sample_self.memory = 12533
can_sample_self.memory = 12533
nan
can_sample_self.memory = 12534
can_sample_self.memory = 12534
nan
can_sample_self.memory = 12535
can_sample_self.memory = 12535
nan
can_sample_self.memory = 12536
can_sample_self.memory = 12536
nan
can_sample_self.memory = 12537
can_sample_self.memory = 12537
nan
can_sample_self.memory = 12

 98%|██████████████████████████████████████████████████████████████████████████████▊ | 197/200 [08:05<00:07,  2.41s/it]

can_sample_self.memory = 12617
can_sample_self.memory = 12617
nan
can_sample_self.memory = 12618
can_sample_self.memory = 12618
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12619
can_sample_self.memory = 12619
nan
can_sample_self.memory = 12620
can_sample_self.memory = 12620
nan
can_sample_self.memory = 12621
can_sample_self.memory = 12621
nan
can_sample_self.memory = 12622
can_sample_self.memory = 12622
nan
can_sample_self.memory = 12623
can_sample_self.memory = 12623
nan
can_sample_self.memory = 12624
can_sample_self.memory = 12624
nan
can_sample_self.memory = 12625
can_sample_self.memory = 12625
nan
can_sample_self.memory = 12626
can_sample_self.memory = 12626
nan
can_sample_self.memory = 12627
can_sample_self.memory = 12627
nan
can_sample_self.memory = 12628
can_sample_self.memory = 12628
nan
can_sample_self.memory = 12629
can_sample_self.memory = 12629
nan
can_sample_self.memory = 12630
can_sample_self.memory = 12630
nan
can_sample_self.memory = 12631


 99%|███████████████████████████████████████████████████████████████████████████████▏| 198/200 [08:08<00:04,  2.50s/it]

nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12688
can_sample_self.memory = 12688
nan
can_sample_self.memory = 12689
can_sample_self.memory = 12689
nan
can_sample_self.memory = 12690
can_sample_self.memory = 12690
nan
can_sample_self.memory = 12691
can_sample_self.memory = 12691
nan
can_sample_self.memory = 12692
can_sample_self.memory = 12692
nan
can_sample_self.memory = 12693
can_sample_self.memory = 12693
nan
can_sample_self.memory = 12694
can_sample_self.memory = 12694
nan
can_sample_self.memory = 12695
can_sample_self.memory = 12695
nan
can_sample_self.memory = 12696
can_sample_self.memory = 12696
nan
can_sample_self.memory = 12697
can_sample_self.memory = 12697
nan
can_sample_self.memory = 12698
can_sample_self.memory = 12698
nan
can_sample_self.memory = 12699
can_sample_self.memory = 12699
nan
can_sample_self.memory = 12700
can_sample_self.memory = 12700
nan
can_sample_self.memory = 12701
can_sample_self.memory = 12701
nan
can_sample_self.memory = 12

100%|███████████████████████████████████████████████████████████████████████████████▌| 199/200 [08:10<00:02,  2.48s/it]

nan
can_sample_self.memory = 12748
can_sample_self.memory = 12748
nan
can_sample_self.memory = 12749
can_sample_self.memory = 12749
nan
can_sample_self.memory = 12750
can_sample_self.memory = 12750
nan
<<< episode >>>
type=<class 'numpy.ndarray'>
can_sample_self.memory = 12751
can_sample_self.memory = 12751
nan
can_sample_self.memory = 12752
can_sample_self.memory = 12752
nan
can_sample_self.memory = 12753
can_sample_self.memory = 12753
nan
can_sample_self.memory = 12754
can_sample_self.memory = 12754
nan
can_sample_self.memory = 12755
can_sample_self.memory = 12755
nan
can_sample_self.memory = 12756
can_sample_self.memory = 12756
nan
can_sample_self.memory = 12757
can_sample_self.memory = 12757
nan
can_sample_self.memory = 12758
can_sample_self.memory = 12758
nan
can_sample_self.memory = 12759
can_sample_self.memory = 12759
nan
can_sample_self.memory = 12760
can_sample_self.memory = 12760
nan
can_sample_self.memory = 12761
can_sample_self.memory = 12761


100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [08:11<00:00,  2.46s/it]

nan
can_sample_self.memory = 12762
can_sample_self.memory = 12762
nan
can_sample_self.memory = 12763
can_sample_self.memory = 12763
nan





In [20]:
print(stats)

{'MSE Loss': [2375844.75, 5088436.5, 148934096.0, 6128829.0, 66793812.0, 86714056.0, 34752756.0, 122371416.0, 17602574.0, 45964020.0, 40935384.0, 30054684.0, 16265722.0, 4496816.5, 698920.75, 5575220.0, 43543744.0, 36385980.0, 14978548.0, 3181392.25, 7803361.5, 11161882.0, 11837208.0, 10103793.0, 7649597.5, 3984485.0, 2891495.75, 3287455.75, 4108750.5, 1361164.75, 572897.9375, 276383.90625, 342138.84375, 89853.2578125, 178701.875, 5055975.0, 6063861.0, 5776304.5, 3720219.25, 2239389.75, 1595274.125, 1590436.25, 34762232.0, 35345768.0, 30570076.0, 24857896.0, 16299360.0, 9290569.0, 10029487.0, 2752579.0, 3405988.25, 7997517.5, 12808483.0, 13855658.0, 11858585.0, 10494506.0, 5999563.0, 3672106.25, 672283.375, 1696216.0, 3149018.0, 2996269.25, 2642430.5, 2002696.625, 1489426.75, 1692573.375, 1415853.0, 17030168.0, 16680293.0, 14665839.0, 9290231.0, 7906792.0, 8511752.0, 9984770.0, 9117038.0, 8242476.5, 1481299.125, 950045.625, 608150.6875, 1207343.375, 1096141.125, 1478952.875, 1262674.12

# Here Lies the Problem
As you see in above output we are getting nan in the loss which is because we are getting nan in the actions that the neural network is predicting. We are unable to pinpoint the issue it could be because of incorrect gradients, faulty loss function, bad policy or learning rate parameter, or faulty inputs (well inputs are faulty because the previous output is fault, but unable to understand why the first output is coming as nan even when input is correct)

## Show results

### Plot execution stats

In [21]:
plot_stats(stats)

<IPython.core.display.Javascript object>

### Plot the cost to go: $ - \max_a \hat q(s,a|\theta)$

In [None]:
#plot_cost_to_go(gym_env, q_network, xlabel='Car Position', ylabel='Velocity')

### Show resulting policy: $\pi(s)$

In [None]:
#plot_max_q(gym_env, q_network, xlabel='Car Position', ylabel='Velocity', action_labels=['Back', 'Do nothing', 'Forward'])

### Test the resulting agent

In [None]:
def test_agent(env: gym.Env, policy: Callable, episodes: int = 10) -> None:
    print('<< test_agent >> ')
    #plt.figure(figsize=(8, 8))
    for episode in range(episodes):
        state = env.reset()
        done = False
        #img = plt.imshow(env.render(mode='rgb_array'))
        while not done:
            p = policy(state)
            if isinstance(p, np.ndarray):
                action = np.random.choice(4, p=p)
            else:
                action = p
            next_state, _, done, _ = env.step(action)
            #img.set_data(env.render(mode='rgb_array'))
            #plt.axis('off')
            #display.display(plt.gcf())
            #display.clear_output(wait=True)
            state = next_state

In [None]:
#test_agent(gym_env, q_network, episodes=2)