In [None]:
!pip install gym

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install gym[atari,accept-rom-license]==0.21.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym[accept-rom-license,atari]==0.21.0
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 2.9 MB/s 
Collecting autorom[accept-rom-license]~=0.4.2
  Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)
Collecting ale-py~=0.7.1
  Downloading ale_py-0.7.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 52.8 MB/s 
Collecting AutoROM.accept-rom-license
  Downloading AutoROM.accept-rom-license-0.4.2.tar.gz (9.8 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: gym, AutoROM.accept-rom-license
  Building wheel for gym (setup.py) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616821 sha256

In [1]:
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import pickle
import gym
import time
start_time=time.time()

# hyperparameters
H = 800 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
resume = False # resume from previous checkpoint?
render = False

# model initialization
D = 80 * 80 # input dimensionality: 80x80 grid
if resume:
    model = pickle.load(open('save.p', 'rb'))
else:
    model = {}
    model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
    model['W2'] = np.random.randn(H) / np.sqrt(H)

grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    I = I[35:195] # crop
    I = I[::2,::2,0] # downsample by factor of 2
    I[I == 144] = 0 # erase background (background type 1)
    I[I == 109] = 0 # erase background (background type 2)
    I[I != 0] = 1 # everything else (paddles, ball) just set to 1
    return I.astype(np.float).ravel()

def discount_rewards(r):
    """ take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, r.size)):
        if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r

def policy_forward(x):
    h = np.dot(model['W1'], x)
    h[h<0] = 0 # ReLU nonlinearity
    logp = np.dot(model['W2'], h)
    p = sigmoid(logp)
    return p, h # return probability of taking action 2, and hidden state

def policy_backward(eph, epdlogp):
    """ backward pass. (eph is array of intermediate hidden states) """
    dW2 = np.dot(eph.T, epdlogp).ravel()
    dh = np.outer(epdlogp, model['W2'])
    dh[eph <= 0] = 0 # backpro prelu
    dW1 = np.dot(dh.T, epx)
    return {'W1':dW1, 'W2':dW2}

env = gym.make("Pong-v0")
observation = env.reset()
prev_x = None # used in computing the difference frame
xs,hs,dlogps,drs = [],[],[],[]
running_reward = None
reward_sum = 0
episode_number = 0
while True:
    if render: env.render()

  # preprocess the observation, set input to network to be difference image
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

  # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

  # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

  # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
        episode_number += 1

    # stack together all inputs, hidden states, action gradients, and rewards for this episode
        epx = np.vstack(xs)
        eph = np.vstack(hs)
        epdlogp = np.vstack(dlogps)
        epr = np.vstack(drs)
        xs,hs,dlogps,drs = [],[],[],[] # reset array memory

    # compute the discounted reward backwards through time
        discounted_epr = discount_rewards(epr)
    # standardize the rewards to be unit normal (helps control the gradient estimator variance)
        discounted_epr -= np.mean(discounted_epr)
        discounted_epr /= np.std(discounted_epr)

        epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
        grad = policy_backward(eph, epdlogp)
        for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

    # perform rmsprop parameter update every batch_size episodes
        if episode_number % batch_size == 0:
            for k,v in model.items():
                g = grad_buffer[k] # gradient
                rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
                model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
                grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

    # boring book-keeping
        running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
        print('resetting env. episode reward total was {}. running mean: {}'.format(reward_sum, running_reward))
        if episode_number % 100 == 0: pickle.dump(model, open('save.p', 'wb'))
        reward_sum = 0
        observation = env.reset() # reset env
        prev_x = None
        if running_reward>=-19:
            break
    if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
        print ('ep {}: game finished, reward: {}'.format(episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))
end_time=time.time()
td_model=end_time-start_time
print('Time difference for model is %s seconds:' %(td_model))


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return I.astype(np.float).ravel()


ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
ep 0: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -21.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game finished, reward: -1.0
ep 1: game fini

ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
ep 10: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.961369446021934
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: game finished, reward: -1.0
ep 11: ga

ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
ep 20: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.916544358835576
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: game finished, reward: -1.0
ep 21: ga

ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: 1.0 !!!!!!!!
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
ep 30: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.886074240103678
ep 31: game finished, reward: -1.0
ep 31: game finished, reward: -1.0
ep 31: game finished, reward: -1.0
ep 31: game finished, reward: -1.0
ep 31: game finished, reward: -1.0
e

ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
ep 40: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.877840137923727
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: game finished, reward: -1.0
ep 41: ga

ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
ep 50: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.861929812023938
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: game finished, reward: -1.0
ep 51: ga

ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
ep 60: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.855911143422286
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: game finished, reward: -1.0
ep 61: ga

ep 69: game finished, reward: -1.0
ep 69: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.801016980260048
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: 1.0 !!!!!!!!
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
ep 70: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.793006

ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: -1.0
ep 79: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.762862746759197
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
ep 80: game finished, reward: -1.0
e

ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
ep 89: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.718385845564185
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: game finished, reward: -1.0
ep 90: ga

ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: 1.0 !!!!!!!!
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: 1.0 !!!!!!!!
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
ep 99: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.65892168960262
ep 100: game finished, reward: -1.0
ep 100: game finished, reward: -1.0
ep 100: game finished, reward: -1.0
ep 100: game finished, reward: -1.0
ep 100: game finished, re

ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
ep 108: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.59397910212198
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finished, reward: -1.0
ep 109: game finishe

resetting env. episode reward total was -20.0. running mean: -20.589882006450633
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
ep 118: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.593983186386126
ep 119: game finished, reward: -1.0
ep 119: game finished, reward: -1.0
ep 119: ga

ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: 1.0 !!!!!!!!
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: 1.0 !!!!!!!!
ep 127: game finished, reward: -1.0
ep 127: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.53185205570237
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 128: game finished, reward: -1.0
ep 1

ep 136: game finished, reward: -1.0
ep 136: game finished, reward: -1.0
ep 136: game finished, reward: -1.0
ep 136: game finished, reward: -1.0
ep 136: game finished, reward: -1.0
ep 136: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.495075670553973
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: 1.0 !!!!!!!!
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: 1.0 !!!!!!!!
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 137: game finished, reward: -1.0
ep 

ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
ep 146: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.468861494472527
ep 147: game finished, reward: -1.0
ep 147: game finished, reward: -1.0
ep 147: game finished, reward: -1.0
ep 147: game finished, reward: -1.0
ep 147: game finished, reward: -1.0
ep 147: game finished, reward: -1.0
ep 147: game finish

ep 155: game finished, reward: -1.0
ep 155: game finished, reward: -1.0
ep 155: game finished, reward: -1.0
ep 155: game finished, reward: -1.0
ep 155: game finished, reward: -1.0
ep 155: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.466455359920907
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: 1.0 !!!!!!!!
ep 156: game finished, reward: -1.0
ep 156: game finished, reward: -1.0
ep 156: gam

ep 165: game finished, reward: 1.0 !!!!!!!!
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
ep 165: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.46018446988332
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game finished, reward: -1.0
ep 166: game

resetting env. episode reward total was -21.0. running mean: -20.488039599790874
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
ep 175: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.493159203792967
ep 176: game finished, reward: -1.0
ep 176: game finished, reward: -1.0
ep 176: ga

ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
ep 184: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.479629180313182
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finished, reward: -1.0
ep 185: game finish

ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: 1.0 !!!!!!!!
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: 1.0 !!!!!!!!
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
ep 194: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.46248203194035
ep 195: game finished, reward: -1.0
ep 195: game finished, reward: -1.0
ep 195: game finished, reward: -1.0
ep 195: game finished, reward: -1.0
ep 1

ep 203: game finished, reward: -1.0
ep 203: game finished, reward: 1.0 !!!!!!!!
ep 203: game finished, reward: -1.0
ep 203: game finished, reward: -1.0
ep 203: game finished, reward: -1.0
ep 203: game finished, reward: -1.0
ep 203: game finished, reward: -1.0
ep 203: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.45004061840088
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game finished, reward: -1.0
ep 204: game

ep 213: game finished, reward: -1.0
ep 213: game finished, reward: 1.0 !!!!!!!!
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: 1.0 !!!!!!!!
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: 1.0 !!!!!!!!
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
ep 213: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -20.404497901305813
ep 214: game finished, reward: -1.0
ep 214: game finished, reward: -1.0
ep 214: game finished, reward: 

ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
ep 222: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.407762998251087
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: 1.0 !!!!!!!!
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: game finished, reward: -1.0
ep 223: gam

resetting env. episode reward total was -21.0. running mean: -20.382203569716815
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
ep 232: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.388381534019647
ep 233: game finished, reward: -1.0
ep 233: game finished, reward: -1.0
ep 233: ga

ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
ep 241: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.365084416764894
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finished, reward: -1.0
ep 242: game finish

resetting env. episode reward total was -19.0. running mean: -20.362521812592288
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: -1.0
ep 251: game finished, reward: 1.0 !!!!!!!!
ep 251: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.358896594466366
ep 252: game finished, reward: -1.0
ep

ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
ep 260: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.376089891201172
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: 1.0 !!!!!!!!
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: game finished, reward: -1.0
ep 261: gam

ep 269: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.335005807332948
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: 1.0 !!!!!!!!
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: 1.0 !!!!!!!!
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
ep 270: game finished, reward: -1.0
resetting env. episode reward total was

ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
ep 279: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.3226782352213
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished, reward: -1.0
ep 280: game finished

ep 288: game finished, reward: -1.0
ep 288: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.333209637340307
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
ep 289: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.339877540966903
ep 290: ga

ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
ep 298: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.35007248718912
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finished, reward: -1.0
ep 299: game finishe

ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: 1.0 !!!!!!!!
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
ep 308: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.37291122735879
ep 309: game finished, reward: -1.0
ep 309: game finished, reward: -1.0
ep 309: game finished, reward: -1.0
ep 309: game finished, reward: -1.0
ep 309: game finished, reward: -1.0
ep 309: game

ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
ep 317: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.368225798994903
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: -1.0
ep 318: game finished, reward: 1.0 !!!!!!!!
ep 318: gam

ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: -1.0
ep 327: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.399893604583145
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: game finished, reward: -1.0
ep 328: gam

resetting env. episode reward total was -21.0. running mean: -20.432865843882404
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
ep 337: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.43853718544358
ep 338: game finished, reward: -1.0
ep 338: game finished, reward: -1.0
ep 338: gam

ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
ep 346: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.353054027579656
ep 347: game finished, reward: -1.0
ep 347: game finished, reward: -1.0
ep 347: game finished, reward: -1.0
ep 347: game finished, reward: -1.0
ep 347: game finished, reward: -1.0
ep 347: game finished, reward: -1.0
ep 347: game finish

ep 355: game finished, reward: -1.0
ep 355: game finished, reward: -1.0
ep 355: game finished, reward: -1.0
ep 355: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.370571103015923
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
ep 356: game finished, reward: -1.0
resetting env. epis

ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
ep 365: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.383104452602957
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finished, reward: -1.0
ep 366: game finish

ep 374: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.389169829690314
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: 1.0 !!!!!!!!
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
ep 375: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.38527813139341
ep 

resetting env. episode reward total was -19.0. running mean: -20.297478641015424
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: 1.0 !!!!!!!!
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: 1.0 !!!!!!!!
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
ep 384: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.2845038546

ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
ep 393: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.29892077370776
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: 1.0 !!!!!!!!
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game finished, reward: -1.0
ep 394: game

ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
ep 403: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.337605540611417
ep 404: game finished, reward: -1.0
ep 404: game finished, reward: -1.0
ep 404: game finished, reward: -1.0
ep 404: game finished, reward: -1.0
ep 404: game finished, reward: -1.0
ep 404: game finish

ep 412: game finished, reward: -1.0
ep 412: game finished, reward: -1.0
ep 412: game finished, reward: -1.0
ep 412: game finished, reward: -1.0
ep 412: game finished, reward: -1.0
ep 412: game finished, reward: -1.0
ep 412: game finished, reward: 1.0 !!!!!!!!
ep 412: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.328203651325374
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: game finished, reward: -1.0
ep 413: gam

ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
ep 422: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.36456879675439
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finished, reward: -1.0
ep 423: game finishe

ep 431: game finished, reward: -1.0
ep 431: game finished, reward: -1.0
ep 431: game finished, reward: -1.0
ep 431: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.35197433870258
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: 1.0 !!!!!!!!
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game finished, reward: -1.0
ep 432: game

ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: -1.0
ep 441: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.318120399819485
ep 442: game finished, reward: -1.0
ep 442: game finished, reward: -1.0
ep 442: game finished, reward: -1.0
ep 442: game finished, reward: -1.0
ep 442: game finished, reward: -1.0
ep 442: gam

ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: -1.0
ep 450: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.300210531116484
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: 1.0 !!!!!!!!
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: 1.0 !!!!!!!!
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: -1.0
ep 451: game finished, reward: 

ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
ep 460: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.339342702613155
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finished, reward: -1.0
ep 461: game finish

ep 469: game finished, reward: -1.0
ep 469: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.35823089710305
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
ep 470: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.36464858813202
ep 471: game

ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
ep 479: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.37193365315696
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: -1.0
ep 480: game finished, reward: 1.0 !!!!!!!!
ep 480: game

ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
ep 489: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.385942161681598
ep 490: game finished, reward: -1.0
ep 490: game finished, reward: 1.0 !!!!!!!!
ep 490: game finished, reward: -1.0
ep 490: game finished, reward: -1.0
ep 490: game finished, reward: 1.0 !!!!!!!!
ep 490: game finished, reward: 1.0 !!!!

ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: 1.0 !!!!!!!!
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
ep 498: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.325746211365082
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: game finished, reward: -1.0
ep 499: gam

resetting env. episode reward total was -21.0. running mean: -20.336118942408806
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
ep 508: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.342757752984717
ep 509: game finished, reward: -1.0
ep 509: game finished, reward: -1.0
ep 509: ga

ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
ep 517: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.362125069160275
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finished, reward: -1.0
ep 518: game finish

ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
ep 527: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.38535824799892
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finished, reward: -1.0
ep 528: game finishe

ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: 1.0 !!!!!!!!
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: 1.0 !!!!!!!!
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
ep 537: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.41461911643922
ep 538: game finished, reward: -1.0
ep 538: game finished, reward: -1.0
ep 538: game finished, reward: -1.0
ep 538: game finished, reward: -1.0
ep 5

ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: 1.0 !!!!!!!!
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
ep 546: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.388946962110644
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: 1.0 !!!!!!!!
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 547: game finished, reward: -1.0
ep 

ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
ep 556: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.42891875969928
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finished, reward: -1.0
ep 557: game finishe

ep 565: game finished, reward: -1.0
ep 565: game finished, reward: -1.0
ep 565: game finished, reward: -1.0
ep 565: game finished, reward: -1.0
ep 565: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.421326782284382
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finished, reward: -1.0
ep 566: game finish

ep 575: game finished, reward: -1.0
ep 575: game finished, reward: 1.0 !!!!!!!!
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: 1.0 !!!!!!!!
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
ep 575: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -20.38938656012277
ep 576: game finished, reward: -1.0
ep 576: game finished, reward: -1.0
ep 576: game finished, reward: -1.0
ep 576: game finished, reward: -1.0
ep 576: game finished, reward: -1.0
ep 5

ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
ep 584: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.385698596221452
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: 1.0 !!!!!!!!
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: game finished, reward: -1.0
ep 585: gam

resetting env. episode reward total was -20.0. running mean: -20.35194447661332
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: 1.0 !!!!!!!!
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: 1.0 !!!!!!!!
ep 594: game finished, reward: -1.0
ep 594: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.33842503184

ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
ep 603: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.367676954171607
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finished, reward: -1.0
ep 604: game finish

ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: 1.0 !!!!!!!!
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: 1.0 !!!!!!!!
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: 1.0 !!!!!!!!
ep 613: game finished, reward: 1.0 !!!!!!!!
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
ep 613: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -20.331070323185685
ep 614: game finished, reward: -1.0
ep 614: game finished, 

ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
ep 622: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.32069936227747
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finished, reward: -1.0
ep 623: game finishe

ep 631: game finished, reward: -1.0
ep 631: game finished, reward: -1.0
ep 631: game finished, reward: -1.0
ep 631: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.30170664713576
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: 1.0 !!!!!!!!
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: 1.0 !!!!!!!!
ep 632: game finished, reward: -1.0
ep 632: game finished, reward: -1.0
ep 6

ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
ep 641: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.263536556556023
ep 642: game finished, reward: -1.0
ep 642: game finished, reward: -1.0
ep 642: game finished, reward: -1.0
ep 642: game finished, reward: -1.0
ep 642: game finished, reward: -1.0
ep 642: game finished, reward: -1.0
ep 642: game finish

ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
ep 650: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.203575161110148
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finished, reward: -1.0
ep 651: game finish

ep 659: game finished, reward: -1.0
ep 659: game finished, reward: -1.0
ep 659: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.215846072004048
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: 1.0 !!!!!!!!
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
ep 660: game finished, reward: -1.0
resetting e

ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
ep 669: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.214120375615312
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finished, reward: -1.0
ep 670: game finish

ep 678: game finished, reward: -1.0
ep 678: game finished, reward: -1.0
ep 678: game finished, reward: -1.0
ep 678: game finished, reward: -1.0
ep 678: game finished, reward: -1.0
ep 678: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.205604508643326
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finished, reward: -1.0
ep 679: game finish

ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
ep 688: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.23409126176665
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finished, reward: -1.0
ep 689: game finishe

ep 697: game finished, reward: -1.0
ep 697: game finished, reward: -1.0
ep 697: game finished, reward: -1.0
ep 697: game finished, reward: -1.0
ep 697: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.223854109768265
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: 1.0 !!!!!!!!
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: 1.0 !!!!!!!!
ep 698: game finished, reward: -1.0
ep 698: game finished, reward: -1.0
ep 

ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
ep 707: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.25144916135916
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finished, reward: -1.0
ep 708: game finishe

resetting env. episode reward total was -20.0. running mean: -20.278035641816164
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
ep 717: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.285255285398
ep 718: game finished, reward: -1.0
ep 718: game finished, reward: -1.0
ep 718: game 

ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
ep 726: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.271361921153936
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finished, reward: -1.0
ep 727: game finish

resetting env. episode reward total was -21.0. running mean: -20.286430250842642
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: 1.0 !!!!!!!!
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: 1.0 !!!!!!!!
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
ep 736: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.2735659483

ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
ep 745: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.269035736701607
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finished, reward: -1.0
ep 746: game finish

ep 754: game finished, reward: -1.0
ep 754: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.263727786588742
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
ep 755: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.271090508722857
ep 756: ga

ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
ep 764: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.3053097572648
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: 1.0 !!!!!!!!
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game finished, reward: -1.0
ep 765: game 

ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
ep 774: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.315510171413756
ep 775: game finished, reward: -1.0
ep 775: game finished, reward: -1.0
ep 775: game finished, reward: -1.0
ep 775: game finished, reward: -1.0
ep 775: game finished, reward: -1.0
ep 775: game finished, reward: -1.0
ep 775: game finish

ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
ep 783: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.30685848791707
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: 1.0 !!!!!!!!
ep 784: game finished, reward: -1.0
ep 784: game finished, reward: -1.0
ep 784: game

ep 792: game finished, reward: -1.0
ep 792: game finished, reward: -1.0
ep 792: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.260616668182788
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
ep 793: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running 

ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
ep 802: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.264257642871144
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finished, reward: -1.0
ep 803: game finish

ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
ep 811: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.233346805697963
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: 1.0 !!!!!!!!
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: game finished, reward: -1.0
ep 812: gam

ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
ep 821: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.23889663935615
ep 822: game finished, reward: -1.0
ep 822: game finished, reward: -1.0
ep 822: game finished, reward: -1.0
ep 822: game finished, reward: -1.0
ep 822: game finished, reward: -1.0
ep 822: game finished, reward: -1.0
ep 822: game finishe

ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
ep 830: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.236783369355006
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finished, reward: -1.0
ep 831: game finish

ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: 1.0 !!!!!!!!
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
ep 840: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.242492667978695
ep 841: game finished, reward: -1.0
ep 841: game finished, reward: 1.0 !!!!!!!!
ep 841: game finished, reward: -1.0
ep 841: game finished, reward: -1.0
ep 841: game finished, reward: -1.0
ep 

ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
ep 849: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.221130212914666
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finished, reward: -1.0
ep 850: game finish

ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
ep 859: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.24774906527075
ep 860: game finished, reward: -1.0
ep 860: game finished, reward: -1.0
ep 860: game finished, reward: -1.0
ep 860: game finished, reward: -1.0
ep 860: game finished, reward: -1.0
ep 860: game finished, reward: -1.0
ep 860: game finishe

ep 868: game finished, reward: -1.0
ep 868: game finished, reward: -1.0
ep 868: game finished, reward: -1.0
ep 868: game finished, reward: -1.0
ep 868: game finished, reward: -1.0
ep 868: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.256121996243984
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finished, reward: -1.0
ep 869: game finish

ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
ep 878: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.27930371193764
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finished, reward: -1.0
ep 879: game finishe

resetting env. episode reward total was -21.0. running mean: -20.28456151170645
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
ep 888: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.291715896589388
ep 889: game finished, reward: -1.0
ep 889: game finished, reward: -1.0
ep 889: gam

ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: 1.0 !!!!!!!!
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
ep 897: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.267352097527482
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: game finished, reward: -1.0
ep 898: gam

ep 906: game finished, reward: -1.0
ep 906: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.26335240572135
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: 1.0 !!!!!!!!
ep 907: game finished, reward: 1.0 !!!!!!!!
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -1.0
ep 907: game finished, reward: -

ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: 1.0 !!!!!!!!
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: -1.0
ep 916: game finished, reward: 1.0 !!!!!!!!
ep 916: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.20962116266205
ep 917: game finished, reward: -1.0
ep 917: game finished, reward: -1.0
ep 917: game finished, reward: -1.0
ep 9

ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
ep 925: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.21082478735371
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finished, reward: -1.0
ep 926: game finishe

ep 934: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -20.210062891861053
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: 1.0 !!!!!!!!
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
ep 935: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.2079622629

ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
ep 944: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.22900259086793
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: 1.0 !!!!!!!!
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: 1.0 !!!!!!!!
ep 945: game finished, reward: -1.0
ep 945: game finished, reward: -1.0
ep 9

ep 953: game finished, reward: 1.0 !!!!!!!!
ep 953: game finished, reward: -1.0
ep 953: game finished, reward: -1.0
ep 953: game finished, reward: -1.0
ep 953: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.19015778318673
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game finished, reward: -1.0
ep 954: game

ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: 1.0 !!!!!!!!
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
ep 963: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.181203728061057
ep 964: game finished, reward: -1.0
ep 964: game finished, reward: -1.0
ep 964: game finished, reward: -1.0
ep 964: game finished, reward: -1.0
ep 964: game finished, reward: -1.0
ep 964: gam

ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
ep 972: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.15544931615033
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finished, reward: -1.0
ep 973: game finishe

resetting env. episode reward total was -20.0. running mean: -20.180537185390932
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
ep 982: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.188731813537025
ep 983: game finished, reward: -1.0
ep 983: game finished, reward: -1.0
ep 983: ga

ep 991: game finished, reward: -1.0
ep 991: game finished, reward: -1.0
ep 991: game finished, reward: -1.0
ep 991: game finished, reward: -1.0
ep 991: game finished, reward: -1.0
ep 991: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.230062082387022
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: 1.0 !!!!!!!!
ep 992: game finished, reward: -1.0
ep 992: game finished, reward: -1.0
ep 992: gam

ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: 1.0 !!!!!!!!
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
ep 1001: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.207489558024104
ep 1002: game finished, reward: -1.0
ep 1002: game finished, reward: -1.0
ep 1002: game finished, reward: -1.0
ep 1002: game finished, reward: -1.0
ep 1002: game finished,

ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
ep 1010: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.257385794005845
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward: -1.0
ep 1011: game finished, reward:

ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
ep 1020: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.30042822906444
ep 1021: game finished, reward: -1.0
ep 1021: game finished, reward: -1.0
ep 1021: game finished, reward: -1.0
ep 1021: game finished, reward: -1.0
ep 1021: game finished, reward: -1.0
ep 1021: game finished, reward: 

ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: 1.0 !!!!!!!!
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
ep 1029: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.27434545729527
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: -1.0
ep 1030: game finished, reward: 1.0 !!!!!!!!
ep 1030: game fi

ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: -1.0
ep 1038: game finished, reward: 1.0 !!!!!!!!
ep 1038: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.223133840616565
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished, reward: -1.0
ep 1039: game finished,

ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
ep 1047: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.231705424316864
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: 1.0 !!!!!!!!
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: -1.0
ep 1048: game finished, reward: 1.0 !!!!!!!!
ep 1048: game finished, reward: 1.0 !!!!!!!!
ep 1048: game finished, reward: -1.0
ep 1048

ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: 1.0 !!!!!!!!
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
ep 1056: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.16458435457735
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, reward: -1.0
ep 1057: game finished, 

ep 1065: game finished, reward: -1.0
ep 1065: game finished, reward: -1.0
ep 1065: game finished, reward: 1.0 !!!!!!!!
ep 1065: game finished, reward: -1.0
ep 1065: game finished, reward: -1.0
ep 1065: game finished, reward: -1.0
ep 1065: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -20.15869980550966
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, reward: -1.0
ep 1066: game finished, 

resetting env. episode reward total was -19.0. running mean: -20.172544861522738
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
ep 1075: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.18081941290751
ep 1076: game finished, reward: -1.0
ep 1076: game finished, r

resetting env. episode reward total was -19.0. running mean: -20.128221521021956
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: 1.0 !!!!!!!!
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
ep 1084: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.126939305811735
ep 1085: game fi

ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: 1.0 !!!!!!!!
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
ep 1093: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.144689829120686
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished, reward: -1.0
ep 1094: game finished,

ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
ep 1102: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.169940556354565
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward: -1.0
ep 1103: game finished, reward:

ep 1111: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.184560323241072
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
ep 1112: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.192714720008663
ep 1113: game finished, 

ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: 1.0 !!!!!!!!
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: 1.0 !!!!!!!!
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
ep 1121: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.175857953850965
ep 1122: game finished, reward: -1.0
ep 1122: game finished, reward: -1.0
ep 1122: game finished, reward: -1.0
ep 1122: game f

ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: -1.0
ep 1130: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.179675829009263
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished, reward: -1.0
ep 1131: game finished,

ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: -1.0
ep 1139: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.16365752470515
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, reward: -1.0
ep 1140: game finished, 

ep 1148: game finished, reward: 1.0 !!!!!!!!
ep 1148: game finished, reward: -1.0
ep 1148: game finished, reward: -1.0
ep 1148: game finished, reward: -1.0
ep 1148: game finished, reward: 1.0 !!!!!!!!
ep 1148: game finished, reward: -1.0
ep 1148: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -20.148323822097634
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game finished, reward: -1.0
ep 1149: game f

ep 1157: game finished, reward: -1.0
ep 1157: game finished, reward: -1.0
ep 1157: game finished, reward: -1.0
ep 1157: game finished, reward: -1.0
ep 1157: game finished, reward: 1.0 !!!!!!!!
ep 1157: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.11522467823617
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, reward: -1.0
ep 1158: game finished, 

ep 1166: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.11429400993384
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
ep 1167: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.123151069834503
ep 1168: game finished, r

ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: 1.0 !!!!!!!!
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: 1.0 !!!!!!!!
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
ep 1176: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.130563508357238
ep 1177: game finished, reward: -1.0
ep 1177: game finished, reward: -1.0
ep 1177: game finished, reward: -1.0
ep 1177: game finished, reward: -1.0
ep 1177: game finished, reward: -1.0
ep 1177: game finished, reward: -1.0
ep 1177: game f

ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
ep 1185: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.129267213896245
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward: -1.0
ep 1186: game finished, reward:

ep 1194: game finished, reward: -1.0
ep 1194: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.175166591944738
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
ep 1195: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.183

ep 1203: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.15032726956872
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: 1.0 !!!!!!!!
ep 1204: game finished, reward: 1.0 !!!!!!!!
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
ep 1204: game finished, reward: -1.0
resetting env. e

ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
ep 1213: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.184945974135935
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward: -1.0
ep 1214: game finished, reward:

ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
ep 1222: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.18895408276817
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: 1.0 !!!!!!!!
ep 1223: game finished, reward: -1.0
ep 1223: game finished, reward: -1.0
ep 1223: game finished, 

ep 1231: game finished, reward: -1.0
ep 1231: game finished, reward: -1.0
ep 1231: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.19211877369117
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: 1.0 !!!!!!!!
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, reward: -1.0
ep 1232: game finished, 

ep 1241: game finished, reward: 1.0 !!!!!!!!
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
ep 1241: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.192677281248237
ep 1242: game finished, reward: -1.0
ep 1242: game finished, reward: -1.0
ep 1242: game finished, reward: -1.0
ep 1242: game finished,

ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: 1.0 !!!!!!!!
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
ep 1250: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.165832575567787
ep 1251: game finished, reward: -1.0
ep 1251: game finished, reward: -1.0
ep 1251: game finished, reward: -1.0
ep 1251: game finished, reward: -1.0
ep 1251: game finished,

ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
ep 1259: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.209342116514097
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward: -1.0
ep 1260: game finished, reward:

ep 1268: game finished, reward: 1.0 !!!!!!!!
ep 1268: game finished, reward: -1.0
ep 1268: game finished, reward: -1.0
ep 1268: game finished, reward: -1.0
ep 1268: game finished, reward: -1.0
ep 1268: game finished, reward: -1.0
ep 1268: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.199878009532434
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished, reward: -1.0
ep 1269: game finished,

ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: 1.0 !!!!!!!!
ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: -1.0
ep 1277: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.144071138800573
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: 1.0 !!!!!!!!
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: 1.0 !!!!!!!!
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278: game finished, reward: -1.0
ep 1278

ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
ep 1287: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.19832228201981
ep 1288: game finished, reward: -1.0
ep 1288: game finished, reward: -1.0
ep 1288: game finished, reward: -1.0
ep 1288: game finished, reward: 1.0 !!!!!!!!
ep 1288: game finished, reward: -1.0
ep 1288: game finished, 

ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: 1.0 !!!!!!!!
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: -1.0
ep 1296: game finished, reward: 1.0 !!!!!!!!
ep 1296: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.161745427346123
ep 1297: game finished, reward: -1.0
ep 1297: game finished, reward: -1.0
ep 1297: game finished, reward: -1.0
ep 1297: game finished, reward: -1.0
ep 1297: game finished, reward: -1.0
ep 1297: game finished, reward: -1.0
ep 1297: game f

ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
ep 1305: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.19580743612059
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: -1.0
ep 1306: game finished, reward: 

ep 1314: game finished, reward: -1.0
ep 1314: game finished, reward: -1.0
ep 1314: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.19819016404591
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
ep 1315: game finished, reward: -1.0
resetting env. episode reward to

ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: 1.0 !!!!!!!!
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: 1.0 !!!!!!!!
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
ep 1324: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.226035795203366
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game finished, reward: -1.0
ep 1325: game f

ep 1333: game finished, reward: 1.0 !!!!!!!!
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: 1.0 !!!!!!!!
ep 1333: game finished, reward: 1.0 !!!!!!!!
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
ep 1333: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -20.17687096260774
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334: game finished, reward: -1.0
ep 1334:

ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
ep 1342: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.161953109856075
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward: -1.0
ep 1343: game finished, reward:

ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
ep 1351: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.157187070056498
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward: -1.0
ep 1352: game finished, reward:

ep 1360: game finished, reward: -1.0
ep 1360: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.162711842062087
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: 1.0 !!!!!!!!
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: -1.0
ep 1361: game finished, reward: 1.0 !!!!!!!!
ep 1361: game finished, reward: -1.0
ep 1361: game f

ep 1369: game finished, reward: -1.0
ep 1369: game finished, reward: -1.0
ep 1369: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.120091081785148
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: 1.0 !!!!!!!!
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished, reward: -1.0
ep 1370: game finished,

ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: 1.0 !!!!!!!!
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: 1.0 !!!!!!!!
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
ep 1379: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.146554577206985
ep 1380: game finished, reward: -1.0
ep 1380: game finished, reward: -1.0
ep 1380: game finished, reward: -1.0
ep 1380: game finished, reward: -1.0
ep 1380: game finished, reward: -1.0
ep 1380: game finished, reward: -1.0
ep 1380: game f

ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: 1.0 !!!!!!!!
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
ep 1388: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.123212481915544
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished, reward: -1.0
ep 1389: game finished,

ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: 1.0 !!!!!!!!
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
ep 1397: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.11371725433323
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: 1.0 !!!!!!!!
ep 1398: game finished, reward: -1.0
ep 1398: game finished, reward: -1.0
ep 1398: game fi

ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
ep 1406: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.10377401830476
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: -1.0
ep 1407: game finished, reward: 

ep 1415: game finished, reward: -1.0
ep 1415: game finished, reward: -1.0
ep 1415: game finished, reward: -1.0
ep 1415: game finished, reward: -1.0
ep 1415: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.12372877280536
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: 1.0 !!!!!!!!
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, reward: -1.0
ep 1416: game finished, 

resetting env. episode reward total was -20.0. running mean: -20.132052011446742
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
ep 1425: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.140731491332275
ep 1426: game finished, reward: -1.0
ep 1426: game finished, 

ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: 1.0 !!!!!!!!
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: 1.0 !!!!!!!!
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
ep 1434: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.137774583023933
ep 1435: game finished, reward: -1.0
ep 1435: game finished, reward: -1.0
ep 1435: game finished, reward: -1.0
ep 1435: game finished, reward: -1.0
ep 1435: game finished, reward: -1.0
ep 1435: game finished, reward: -1.0
ep 1435: game f

ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
ep 1443: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.164689894801512
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward: -1.0
ep 1444: game finished, reward:

ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
ep 1452: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.15027335861579
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: -1.0
ep 1453: game finished, reward: 

ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
ep 1462: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.20289669577669
ep 1463: game finished, reward: -1.0
ep 1463: game finished, reward: -1.0
ep 1463: game finished, reward: -1.0
ep 1463: game finished, reward: -1.0
ep 1463: game finished, reward: -1.0
ep 1463: game finished, reward: 

ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: 1.0 !!!!!!!!
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
ep 1471: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.156435662136506
ep 1472: game finished, reward: -1.0
ep 1472: game finished, reward: -1.0
ep 1472: game finished, reward: -1.0
ep 1472: game finished, reward: -1.0
ep 1472: game finished,

ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
ep 1480: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.180959824420512
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward: -1.0
ep 1481: game finished, reward:

ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
ep 1489: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.16600469605703
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: -1.0
ep 1490: game finished, reward: 

ep 1498: game finished, reward: -1.0
ep 1498: game finished, reward: -1.0
ep 1498: game finished, reward: -1.0
ep 1498: game finished, reward: -1.0
ep 1498: game finished, reward: -1.0
ep 1498: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.16184415409253
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: 1.0 !!!!!!!!
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: 1.0 !!!!!!!!
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game finished, reward: -1.0
ep 1499: game fi

resetting env. episode reward total was -20.0. running mean: -20.17685363131666
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: 1.0 !!!!!!!!
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
ep 1508: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.175085095003492
ep 1509: game fin

ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
ep 1517: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.20828267913609
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: -1.0
ep 1518: game finished, reward: 

ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: -1.0
ep 1526: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -20.198906333820993
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished, reward: -1.0
ep 1527: game finished,

ep 1535: game finished, reward: 1.0 !!!!!!!!
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: 1.0 !!!!!!!!
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: 1.0 !!!!!!!!
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
ep 1535: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -20.095233083374346
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536: game finished, reward: -1.0
ep 1536

ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: 1.0 !!!!!!!!
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
ep 1544: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.066725422132805
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished, reward: -1.0
ep 1545: game finished,

ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: 1.0 !!!!!!!!
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
ep 1553: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.070463676534143
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished, reward: -1.0
ep 1554: game finished,

ep 1562: game finished, reward: -1.0
ep 1562: game finished, reward: -1.0
ep 1562: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -20.102702279884323
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: 1.0 !!!!!!!!
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished, reward: -1.0
ep 1563: game finished,

ep 1571: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -18.0. running mean: -20.083622274329215
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: 1.0 !!!!!!!!
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
ep 1572: game finished, reward: -1.0
resetting env. episode reward total was -20.0. runni

resetting env. episode reward total was -19.0. running mean: -20.055622903951623
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: 1.0 !!!!!!!!
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: 1.0 !!!!!!!!
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: 1.0 !!!!!!!!
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
ep 1581: game finished, reward: -1.0
resetti

ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
ep 1590: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.070285102767464
ep 1591: game finished, reward: -1.0
ep 1591: game finished, reward: -1.0
ep 1591: game finished, reward: -1.0
ep 1591: game finished, reward: -1.0
ep 1591: game finished, reward: -1.0
ep 1591: game finished, reward:

ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: 1.0 !!!!!!!!
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
ep 1599: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -20.082941060663515
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: -1.0
ep 1600: game finished, reward: 1.0 !!!!!!!!
ep 1600: game finished, reward: -1.0
ep 1600: game f

ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
ep 1608: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.046849424880495
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward: -1.0
ep 1609: game finished, reward:

ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
ep 1617: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -20.015515250195392
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward: -1.0
ep 1618: game finished, reward:

ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
ep 1626: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.967494422562613
ep 1627: game finished, reward: -1.0
ep 1627: game finished, reward: -1.0
ep 1627: game finished, reward: -1.0
ep 1627: game finished, reward: -1.0
ep 1627: game finished, reward: -1.0
ep 1627: game finished, reward:

ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
ep 1635: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.9901152697218
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -1.0
ep 1636: game finished, reward: -

ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
ep 1644: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.933251835835485
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward: -1.0
ep 1645: game finished, reward:

ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: 1.0 !!!!!!!!
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
ep 1653: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.88270851677108
ep 1654: game finished, reward: -1.0
ep 1654: game finished, reward: 1.0 !!!!!!!!
ep 1654: game finished, reward: -1.0
ep 1654: game finished, reward: -1.0
ep 1654: game fi

ep 1661: game finished, reward: -1.0
ep 1661: game finished, reward: 1.0 !!!!!!!!
ep 1661: game finished, reward: -1.0
ep 1661: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.806171136398785
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: 1.0 !!!!!!!!
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game finished, reward: -1.0
ep 1662: game f

ep 1670: game finished, reward: -1.0
ep 1670: game finished, reward: -1.0
ep 1670: game finished, reward: -1.0
ep 1670: game finished, reward: -1.0
ep 1670: game finished, reward: -1.0
ep 1670: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.785171068222777
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: -1.0
ep 1671: game finished, reward: 1.0 !!!!!!!!
ep 1671: game finished, reward: -1.0
ep 1671: game finished,

ep 1679: game finished, reward: -1.0
ep 1679: game finished, reward: -1.0
ep 1679: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.803367729042083
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: 1.0 !!!!!!!!
ep 1680: game finished, reward: -1.0
ep 1680: game finished, reward: -1.0
ep 1680: game finished,

ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
ep 1688: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.76156417600186
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: -1.0
ep 1689: game finished, reward: 

ep 1697: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.800441763282073
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
ep 1698: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.812437345649254
ep 1699: game finished, 

ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: 1.0 !!!!!!!!
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
ep 1707: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.83750633108913
ep 1708: game finished, reward: -1.0
ep 1708: game finished, reward: -1.0
ep 1708: game finished, reward: -1.0
ep 1708: game finished, reward: -1.0
ep 1708: game finished, 

ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
ep 1716: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.89990439086738
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: -1.0
ep 1717: game finished, reward: 1.0 !!!!!!!!
ep 1717: game finished, reward: -1.0
ep 1717: game finished, 

ep 1725: game finished, reward: -1.0
ep 1725: game finished, reward: -1.0
ep 1725: game finished, reward: 1.0 !!!!!!!!
ep 1725: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.91969927779134
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: 1.0 !!!!!!!!
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: 1.0 !!!!!!!!
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726: game finished, reward: -1.0
ep 1726:

ep 1734: game finished, reward: -1.0
ep 1734: game finished, reward: -1.0
ep 1734: game finished, reward: -1.0
ep 1734: game finished, reward: -1.0
ep 1734: game finished, reward: -1.0
ep 1734: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.88848314940338
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: -1.0
ep 1735: game finished, reward: 

ep 1743: game finished, reward: -1.0
ep 1743: game finished, reward: -1.0
ep 1743: game finished, reward: -1.0
ep 1743: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.88831471051487
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: -1.0
ep 1744: game finished, reward: 

ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: -1.0
ep 1753: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.917455425865597
ep 1754: game finished, reward: -1.0
ep 1754: game finished, reward: -1.0
ep 1754: game finished, reward: -1.0
ep 1754: game finished, reward: -1.0
ep 1754: game finished,

ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: 1.0 !!!!!!!!
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
ep 1762: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.904221534886307
ep 1763: game finished, reward: -1.0
ep 1763: game finished, reward: -1.0
ep 1763: game finished, reward: -1.0
ep 1763: game finished, reward: -1.0
ep 1763: game finished,

ep 1770: game finished, reward: 1.0 !!!!!!!!
ep 1770: game finished, reward: -1.0
ep 1770: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -18.0. running mean: -19.8340637621138
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: 1.0 !!!!!!!!
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: 1.0 !!!!!!!!
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: 1.0 !!!!!!!!
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward: -1.0
ep 1771: game finished, reward

resetting env. episode reward total was -21.0. running mean: -19.858883193403912
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: 1.0 !!!!!!!!
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: 1.0 !!!!!!!!
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
ep 1780: game finished, reward: -1.0
resetting env. episode reward total was -19.0. runni

ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: 1.0 !!!!!!!!
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
ep 1789: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.882751158551187
ep 1790: game finished, reward: -1.0
ep 1790: game finished, reward: -1.0
ep 1790: game finished, reward: -1.0
ep 1790: game finished, reward: -1.0
ep 1790: game finished,

ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: 1.0 !!!!!!!!
ep 1798: game finished, reward: 1.0 !!!!!!!!
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
ep 1798: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.891926566461123
ep 1799: game finished, reward: -1.0
ep 1799: game finished, reward: -1.0
ep 1799: game finished, reward: 1.0 !!!!!!!!
ep 1799: game finished, reward: -1.0
ep 1799: game finished, reward: -1.0
ep 1799: game finished, reward: -1.0
ep 1799: game finished, reward: -1.0
ep 1799

ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: -1.0
ep 1807: game finished, reward: 1.0 !!!!!!!!
ep 1807: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.902230855630172
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: 1.0 !!!!!!!!
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game finished, reward: -1.0
ep 1808: game f

ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
ep 1816: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.89233704081811
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: -1.0
ep 1817: game finished, reward: 

ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: 1.0 !!!!!!!!
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
ep 1825: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -19.85311628044642
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, reward: -1.0
ep 1826: game finished, 

ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
ep 1834: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.85514570427148
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: -1.0
ep 1835: game finished, reward: 

ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: 1.0 !!!!!!!!
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
ep 1843: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.866903451010245
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: 1.0 !!!!!!!!
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game finished, reward: -1.0
ep 1844: game f

ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: 1.0 !!!!!!!!
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
ep 1852: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.849498106914318
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished, reward: -1.0
ep 1853: game finished,

ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: 1.0 !!!!!!!!
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
ep 1861: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.82292451123257
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, reward: -1.0
ep 1862: game finished, 

ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: 1.0 !!!!!!!!
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
ep 1870: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.857078676828703
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished, reward: -1.0
ep 1871: game finished,

ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
ep 1879: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.868486927668197
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward: -1.0
ep 1880: game finished, reward:

ep 1888: game finished, reward: -1.0
ep 1888: game finished, reward: -1.0
ep 1888: game finished, reward: -1.0
ep 1888: game finished, reward: -1.0
ep 1888: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.880543631627535
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward: -1.0
ep 1889: game finished, reward:

ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: 1.0 !!!!!!!!
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
ep 1898: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.919551064596483
ep 1899: game finished, reward: -1.0
ep 1899: game finished, reward: -1.0
ep 1899: game finished, reward: -1.0
ep 1899: game finished, reward: -1.0
ep 1899: game finished,

ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
ep 1907: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.926417351361202
ep 1908: game finished, reward: -1.0
ep 1908: game finished, reward: -1.0
ep 1908: game finished, reward: 1.0 !!!!!!!!
ep 1908: game finished, reward: -1.0
ep 1908: game finished, reward: -1.0
ep 1908: game finished, reward: -1.0
ep 1908: game finished, reward: 1.0 !!!!!!!!
ep 1908: game finished, reward: -1.0
ep 1908: game f

ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: 1.0 !!!!!!!!
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: 1.0 !!!!!!!!
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
ep 1916: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.904143584899703
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game finished, reward: -1.0
ep 1917: game f

ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: 1.0 !!!!!!!!
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
ep 1925: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.88408833159958
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, reward: -1.0
ep 1926: game finished, 

ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: -1.0
ep 1934: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.85625843576053
ep 1935: game finished, reward: -1.0
ep 1935: game finished, reward: -1.0
ep 1935: game finished, reward: -1.0
ep 1935: game finished, reward: -1.0
ep 1935: game finished, reward: -1.0
ep 1935: game finished, reward: 1.0 !!!!!!!!
ep 1935: game fi

ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: 1.0 !!!!!!!!
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: 1.0 !!!!!!!!
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: 1.0 !!!!!!!!
ep 1943: game finished, reward: -1.0
ep 1943: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -19.828500472485967
ep 1944: game finished, reward: -1.0
ep 1944: game finished, reward: -1.0
ep 1944: game finished, reward: -1.0
ep 1944: game finished, reward: -1.0
ep 1944: game finished, reward: -1.0

ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: 1.0 !!!!!!!!
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
ep 1952: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.814709364556826
ep 1953: game finished, reward: -1.0
ep 1953: game finished, reward: -1.0
ep 1953: game finished, reward: -1.0
ep 1953: game finished, reward: -1.0
ep 1953: game finished,

ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
ep 1961: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.801043620219467
ep 1962: game finished, reward: -1.0
ep 1962: game finished, reward: -1.0
ep 1962: game finished, reward: -1.0
ep 1962: game finished, reward: -1.0
ep 1962: game finished, reward: -1.0
ep 1962: game finished, reward:

ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
ep 1970: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.796714807490936
ep 1971: game finished, reward: -1.0
ep 1971: game finished, reward: -1.0
ep 1971: game finished, reward: -1.0
ep 1971: game finished, reward: -1.0
ep 1971: game finished, reward: -1.0
ep 1971: game finished, reward:

resetting env. episode reward total was -21.0. running mean: -19.736243580090413
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: 1.0 !!!!!!!!
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: -1.0
ep 1979: game finished, reward: 1.0 !!!!!!!!
ep 1979: game finished, reward: -1.0
resetting env. episode reward total was -19.0. runni

ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: -1.0
ep 1988: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.75280859571136
ep 1989: game finished, reward: -1.0
ep 1989: game finished, reward: -1.0
ep 1989: game finished, reward: -1.0
ep 1989: game finished, reward: -1.0
ep 1989: game finished, 

ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
ep 1997: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.764490153012854
ep 1998: game finished, reward: -1.0
ep 1998: game finished, reward: -1.0
ep 1998: game finished, reward: -1.0
ep 1998: game finished, reward: -1.0
ep 1998: game finished, reward: -1.0
ep 1998: game finished, reward:

ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: 1.0 !!!!!!!!
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: 1.0 !!!!!!!!
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: 1.0 !!!!!!!!
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: -1.0
ep 2006: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -19.706728854097207

ep 2014: game finished, reward: -1.0
ep 2014: game finished, reward: -1.0
ep 2014: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.689108209967475
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: -1.0
ep 2015: game finished, reward: 1.0 !!!!!!!!
ep 2015: game finished,

ep 2023: game finished, reward: -1.0
ep 2023: game finished, reward: -1.0
ep 2023: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.696775343341017
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: 1.0 !!!!!!!!
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: 1.0 !!!!!!!!
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: 1.0 !!!!!!!!
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024: game finished, reward: -1.0
ep 2024

ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: 1.0 !!!!!!!!
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
ep 2032: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.616612528912086
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished, reward: -1.0
ep 2033: game finished,

ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: 1.0 !!!!!!!!
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
ep 2041: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.649288586756267
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished, reward: -1.0
ep 2042: game finished,

ep 2050: game finished, reward: -1.0
ep 2050: game finished, reward: -1.0
ep 2050: game finished, reward: -1.0
ep 2050: game finished, reward: -1.0
ep 2050: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.70903669938657
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: -1.0
ep 2051: game finished, reward: 

ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: 1.0 !!!!!!!!
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: 1.0 !!!!!!!!
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: 1.0 !!!!!!!!
ep 2059: game finished, reward: -1.0
ep 2059: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.616495554044153
ep 2060: game finished, reward: -1.0
ep 2060: game finished, reward: 1.0 !!!!!!!!
ep 2060: game finished, reward: -1.0
ep 2060: game finished, reward: -1.0
ep 2060: game finished, reward: -1.0
ep 2060: game finished, reward: -1.0
ep 2060: game finished, reward: 1.0 !!!!!!!!
ep 2060: game finished, rewa

ep 2068: game finished, reward: 1.0 !!!!!!!!
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: 1.0 !!!!!!!!
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: 1.0 !!!!!!!!
ep 2068: game finished, reward: -1.0
ep 2068: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.60132262857446
ep 2069: game finished, reward: -1.0
ep 2069: game finished, reward: -1.0
ep 2069: game finished, reward: -1.0
ep 2069: game finished, reward: -1.0
ep 2069: game finished, reward: -1.0
ep 2069:

ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
ep 2077: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.64560823601634
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: 1.0 !!!!!!!!
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: -1.0
ep 2078: game finished, reward: 1.0 !!!!!!!!
ep 2078: game fi

resetting env. episode reward total was -19.0. running mean: -19.576119851183982
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: 1.0 !!!!!!!!
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
ep 2086: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.580358652672142
ep 2087: game fi

ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: 1.0 !!!!!!!!
ep 2095: game finished, reward: -1.0
ep 2095: game finished, reward: 1.0 !!!!!!!!
ep 2095: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.63509652178208
ep 2096: game finished, reward: -1.0
ep 2096: game finished, reward: -1.0
ep 2096: game finished, reward: -1.0
ep 2096: game finished, reward: -1.0
ep 2096: game finished, reward: -1.0
ep 2096: game finished, reward: -1.0
ep 2096: game fi

ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: 1.0 !!!!!!!!
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
ep 2104: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.647529794021946
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished, reward: -1.0
ep 2105: game finished,

resetting env. episode reward total was -19.0. running mean: -19.597508543902816
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: 1.0 !!!!!!!!
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
ep 2113: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.601533458463788
ep 2114: game fi

resetting env. episode reward total was -20.0. running mean: -19.613499134672495
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
ep 2122: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.62736414332577
ep 2123: game finished, reward: -1.0
ep 2123: game finished, r

ep 2130: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.618894797999296
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: 1.0 !!!!!!!!
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
ep 2131: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean:

resetting env. episode reward total was -21.0. running mean: -19.643892756314475
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: 1.0 !!!!!!!!
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: -1.0
ep 2140: game finished, reward: 1.0 !!!!!!!!
ep 2140: game finished, reward: -1.0
resetting env. episode reward total was -19.0. runni

ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: 1.0 !!!!!!!!
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
ep 2149: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.66882043412826
ep 2150: game finished, reward: -1.0
ep 2150: game finished, reward: -1.0
ep 2150: game finished, reward: -1.0
ep 2150: game finished, reward: -1.0
ep 2150: game finished, 

ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: 1.0 !!!!!!!!
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
ep 2158: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.697371459022246
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished, reward: -1.0
ep 2159: game finished,

ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: 1.0 !!!!!!!!
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: 1.0 !!!!!!!!
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
ep 2167: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.723333846337088
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: -1.0
ep 2168: game finished, reward: 1.0 !!!!!!!!
ep 2168

ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
ep 2176: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.68201337645398
ep 2177: game finished, reward: -1.0
ep 2177: game finished, reward: -1.0
ep 2177: game finished, reward: -1.0
ep 2177: game finished, reward: -1.0
ep 2177: game finished, reward: -1.0
ep 2177: game finished, reward: 

ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
ep 2185: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.738252072364908
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: 1.0 !!!!!!!!
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished, reward: -1.0
ep 2186: game finished,

ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: 1.0 !!!!!!!!
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: -1.0
ep 2194: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -19.0. running mean: -19.684390561684403
ep 2195: game finished, reward: -1.0
ep 2195: game finished, reward: -1.0
ep 2195: game finished, reward: -1.0
ep 2195: game finished, reward: 1.0 !!!!!!!!
ep 2195

resetting env. episode reward total was -20.0. running mean: -19.650932541503835
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: 1.0 !!!!!!!!
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
ep 2203: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.654423216088794
ep 2204: game fi

ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
ep 2212: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.713149817790537
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: -1.0
ep 2213: game finished, reward: 1.0 !!!!!!!!
ep 2213: game finished,

ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: 1.0 !!!!!!!!
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
ep 2221: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.671578431770545
ep 2222: game finished, reward: -1.0
ep 2222: game finished, reward: -1.0
ep 2222: game finished, reward: -1.0
ep 2222: game finished, reward: -1.0
ep 2222: game finished,

ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: 1.0 !!!!!!!!
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
ep 2230: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.69988231060657
ep 2231: game finished, reward: -1.0
ep 2231: game finished, reward: -1.0
ep 2231: game finished, reward: -1.0
ep 2231: game finished, reward: -1.0
ep 2231: game finished, reward: -1.0
ep 2231: game finished, reward: -1.0
ep 2231: game finished, 

ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: 1.0 !!!!!!!!
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
ep 2239: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.686737980410435
ep 2240: game finished, reward: -1.0
ep 2240: game finished, reward: -1.0
ep 2240: game finished, reward: -1.0
ep 2240: game finished, reward: -1.0
ep 2240: game finished,

ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: -1.0
ep 2248: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.732663207096454
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished, reward: -1.0
ep 2249: game finished,

ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: 1.0 !!!!!!!!
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
ep 2257: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.68919363547042
ep 2258: game finished, reward: -1.0
ep 2258: game finished, reward: -1.0
ep 2258: game finished, reward: -1.0
ep 2258: game finished, reward: -1.0
ep 2258: game finished, reward: -1.0
ep 2258: game finished, reward: -1.0
ep 2258: game finished, 

ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
ep 2266: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.667441676844213
ep 2267: game finished, reward: -1.0
ep 2267: game finished, reward: -1.0
ep 2267: game finished, reward: -1.0
ep 2267: game finished, reward: -1.0
ep 2267: game finished, reward: -1.0
ep 2267: game finished, reward:

resetting env. episode reward total was -19.0. running mean: -19.625585137637128
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: 1.0 !!!!!!!!
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: 1.0 !!!!!!!!
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
ep 2275: game finished, reward: -1.0
resetting env. episode reward total was -19.0. runni

ep 2283: game finished, reward: -1.0
ep 2283: game finished, reward: -1.0
ep 2283: game finished, reward: -1.0
ep 2283: game finished, reward: -1.0
ep 2283: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.601181793890994
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward: -1.0
ep 2284: game finished, reward:

ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: 1.0 !!!!!!!!
ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: -1.0
ep 2292: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.58802031543187
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: 1.0 !!!!!!!!
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game finished, reward: -1.0
ep 2293: game fi

ep 2301: game finished, reward: -1.0
ep 2301: game finished, reward: -1.0
ep 2301: game finished, reward: -1.0
ep 2301: game finished, reward: -1.0
ep 2301: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.634425750541137
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: 1.0 !!!!!!!!
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished, reward: -1.0
ep 2302: game finished,

ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: 1.0 !!!!!!!!
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
ep 2310: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.59926093378102
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: 1.0 !!!!!!!!
ep 2311: game finished, reward: -1.0
ep 2311: game finished, reward: -1.0
ep 2311: game fi

ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: 1.0 !!!!!!!!
ep 2319: game finished, reward: -1.0
ep 2319: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.623627899696285
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: 1.0 !!!!!!!!
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: -1.0
ep 2320: game finished, reward: 1.0 !!!!!!!!
ep 2320: game finished, reward: -1.0
ep 2320

ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
ep 2328: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.609578374160126
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward: -1.0
ep 2329: game finished, reward:

ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: 1.0 !!!!!!!!
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: 1.0 !!!!!!!!
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
ep 2337: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.60557322017437
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game finished, reward: -1.0
ep 2338: game fi

ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: 1.0 !!!!!!!!
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
ep 2346: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.600755721852998
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished, reward: -1.0
ep 2347: game finished,

ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: -1.0
ep 2355: game finished, reward: 1.0 !!!!!!!!
ep 2355: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -19.633433625481903
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished, reward: -1.0
ep 2356: game finished,

ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: -1.0
ep 2364: game finished, reward: 1.0 !!!!!!!!
ep 2364: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -18.0. running mean: -19.635140157100373
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game finished, reward: -1.0
ep 2365: game f

ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
ep 2373: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.648354756613987
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward: -1.0
ep 2374: game finished, reward:

ep 2382: game finished, reward: 1.0 !!!!!!!!
ep 2382: game finished, reward: 1.0 !!!!!!!!
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: 1.0 !!!!!!!!
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: 1.0 !!!!!!!!
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
ep 2382: game finished, reward: -1.0
resetting env. episode reward total was -16.0. running mean: -19.627808223908588
ep 2383: game finished, reward: -1.0
ep 2383: game finished, reward: 1.0 !!!!!!!!
ep 2383: game finished, reward: -1.0
ep 2383: game finished, reward: -1.0
ep 2383: game finished, reward: -1.0
ep 2383: game finished, reward: -1.0
ep 2383: game finished, reward: 1.0 !!!!!!!!
ep 2383: game finished, reward: -1.0
ep 2383: game finish

resetting env. episode reward total was -18.0. running mean: -19.559014716343807
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: 1.0 !!!!!!!!
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
ep 2391: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.563424569180366
ep 2392: game fi

ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: 1.0 !!!!!!!!
ep 2399: game finished, reward: -1.0
ep 2399: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.510087210427514
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: -1.0
ep 2400: game finished, reward: 1.0 !!!!!!!!
ep 2400: game finished, reward: -1.0
ep 2400: game f

ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
ep 2408: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.495581306921512
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward: -1.0
ep 2409: game finished, reward:

ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: 1.0 !!!!!!!!
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: 1.0 !!!!!!!!
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: 1.0 !!!!!!!!
ep 2417: game finished, reward: -1.0
ep 2417: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.45195814478986
ep 2418: game finished, reward: -1.0
ep 2418: game finished, reward: -1.0
ep 2418: game finished, reward: -1.0
ep 2418:

ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: 1.0 !!!!!!!!
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: -1.0
ep 2426: game finished, reward: 1.0 !!!!!!!!
ep 2426: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.479552293812656
ep 2427: game finished, reward: -1.0
ep 2427: game finished, reward: -1.0
ep 2427: game finished, reward: -1.0
ep 2427: game f

ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: 1.0 !!!!!!!!
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: 1.0 !!!!!!!!
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
ep 2435: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.51377701211337
ep 2436: game finished, reward: -1.0
ep 2436: game finished, reward: -1.0
ep 2436: game finished, reward: -1.0
ep 2436: game finished, reward: -1.0
ep 2436: game finished, reward: -1.0
ep 2436: game fi

ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: 1.0 !!!!!!!!
ep 2444: game finished, reward: -1.0
ep 2444: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.565533766427592
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished, reward: -1.0
ep 2445: game finished,

ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: 1.0 !!!!!!!!
ep 2453: game finished, reward: -1.0
ep 2453: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.602337097055425
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: -1.0
ep 2454: game finished, reward: 1.0 !!!!!!!!
ep 2454: game f

ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
ep 2462: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.628473961023566
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: 1.0 !!!!!!!!
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: 1.0 !!!!!!!!
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game finished, reward: -1.0
ep 2463: game f

ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: 1.0 !!!!!!!!
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
ep 2471: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.64194976973213
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, reward: -1.0
ep 2472: game finished, 

ep 2480: game finished, reward: 1.0 !!!!!!!!
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: 1.0 !!!!!!!!
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: 1.0 !!!!!!!!
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: -1.0
ep 2480: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -16.0. running mean: -19.632335651177826
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: -1.0
ep 2481: game finished, reward: 1.0 

ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
ep 2489: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.597360277706006
ep 2490: game finished, reward: -1.0
ep 2490: game finished, reward: -1.0
ep 2490: game finished, reward: -1.0
ep 2490: game finished, reward: -1.0
ep 2490: game finished, reward: -1.0
ep 2490: game finished, reward:

ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
ep 2498: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.603566622028627
ep 2499: game finished, reward: -1.0
ep 2499: game finished, reward: -1.0
ep 2499: game finished, reward: -1.0
ep 2499: game finished, reward: -1.0
ep 2499: game finished, reward:

ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
ep 2507: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.62796771007529
ep 2508: game finished, reward: -1.0
ep 2508: game finished, reward: -1.0
ep 2508: game finished, reward: -1.0
ep 2508: game finished, reward: -1.0
ep 2508: game finished, reward: -1.0
ep 2508: game finished, reward: 

ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
ep 2516: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.67138334124447
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: 1.0 !!!!!!!!
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, reward: -1.0
ep 2517: game finished, 

ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
ep 2525: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.690586300552166
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: -1.0
ep 2526: game finished, reward: 1.0 !!!!!!!!
ep 2526: game finished,

ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
ep 2534: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.62263446166966
ep 2535: game finished, reward: -1.0
ep 2535: game finished, reward: -1.0
ep 2535: game finished, reward: 1.0 !!!!!!!!
ep 2535: game finished, reward: -1.0
ep 2535: game finished, reward: -1.0
ep 2535: game finished, 

ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
ep 2543: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.647590467328868
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward: -1.0
ep 2544: game finished, reward:

ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: 1.0 !!!!!!!!
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
ep 2552: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.649722496875047
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished, reward: -1.0
ep 2553: game finished,

ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: 1.0 !!!!!!!!
ep 2561: game finished, reward: -1.0
ep 2561: game finished, reward: 1.0 !!!!!!!!
ep 2561: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.62265713529792
ep 2562: game finished, reward: -1.0
ep 2562: game finished, reward: -1.0
ep 2562: game finished, reward: -1.0
ep 2562: game fi

ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
ep 2570: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.63771800177236
ep 2571: game finished, reward: -1.0
ep 2571: game finished, reward: 1.0 !!!!!!!!
ep 2571: game finished, reward: -1.0
ep 2571: game finished, reward: 1.0 !!!!!!!!
ep 2571: game finished, reward: -1.0
ep 2571: game fi

ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: 1.0 !!!!!!!!
ep 2578: game finished, reward: 1.0 !!!!!!!!
ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: -1.0
ep 2578: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -19.528266752883574
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: 1.0 !!!!!!!!
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579: game finished, reward: -1.0
ep 2579

ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
ep 2587: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.48412390673382
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: -1.0
ep 2588: game finished, reward: 

ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: 1.0 !!!!!!!!
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: 1.0 !!!!!!!!
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
ep 2596: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.452055656339443
ep 2597: game finished, reward: -1.0
ep 2597: game finished, reward: -1.0
ep 2597: game finished, reward: -1.0
ep 2597: game f

ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: 1.0 !!!!!!!!
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: 1.0 !!!!!!!!
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: 1.0 !!!!!!!!
ep 2605: game finished, reward: -1.0
ep 2605: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -19.429470729643274
ep 2606: game finished, reward: -1.0

ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: 1.0 !!!!!!!!
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: 1.0 !!!!!!!!
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: 1.0 !!!!!!!!
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: 1.0 !!!!!!!!
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
ep 2614: game finished, reward: -1.0
resetting env. episode reward total was -17.0. running mean: -19.487152969374165
ep 2615: game finished, reward: -1.0
ep 2615: game finished, reward: -1.0
ep 2615: game finished, reward: -1.0
ep 2615: game finished, reward: -1.0
ep 2615: game finished, reward: -1.0

ep 2622: game finished, reward: -1.0
ep 2622: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.458565024730902
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: 1.0 !!!!!!!!
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: 1.0 !!!!!!!!
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game finished, reward: -1.0
ep 2623: game f

ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: 1.0 !!!!!!!!
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: 1.0 !!!!!!!!
ep 2631: game finished, reward: 1.0 !!!!!!!!
ep 2631: game finished, reward: -1.0
ep 2631: game finished, reward: 1.0 !!!!!!!!
ep 2631: game finished, reward: -1.0
resetting env. episode reward total was -16.0. running mean: -19.358242523661513
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0
ep 2632: game finished, reward: -1.0

ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
ep 2640: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.35716945742593
ep 2641: game finished, reward: -1.0
ep 2641: game finished, reward: -1.0
ep 2641: game finished, reward: -1.0
ep 2641: game finished, reward: -1.0
ep 2641: game finished, reward: -1.0
ep 2641: game finished, reward: 

ep 2648: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.33888715146043
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: -1.0
ep 2649: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: 

ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: -1.0
ep 2657: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.317751971221075
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: -1.0
ep 2658: game finished, reward: 1.0 !!!!!!!!
ep 2658: game finished, reward: -1.0
ep 2658: game f

ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: 1.0 !!!!!!!!
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: 1.0 !!!!!!!!
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: 1.0 !!!!!!!!
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
ep 2666: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.290083521790525
ep 2667: game finished, reward: -1.0
ep 2667: game finished, reward: 1.0 !!!!!!!!
ep 2667: game finished, reward: -1.0
ep 2667: game finished, reward: -1.0
ep 2667: game finished, reward: 1.0 !!!!!!!!
ep 2667: game finished, reward: -1.0
ep 2667: game finished, reward: -1.0
ep 2667: game finished, reward: 1.0 !!!!!!!!
ep 2667: game finished, reward: -1.0
ep 2667: game finish

resetting env. episode reward total was -19.0. running mean: -19.2484349494489
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: 1.0 !!!!!!!!
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: 1.0 !!!!!!!!
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: 1.0 !!!!!!!!
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
ep 2675: game finished, reward: -1.0
resetting

ep 2683: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.27479618182011
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: 1.0 !!!!!!!!
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: 1.0 !!!!!!!!
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: 1.0 !!!!!!!!
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684: game finished, reward: -1.0
ep 2684:

resetting env. episode reward total was -21.0. running mean: -19.32916077244718
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: 1.0 !!!!!!!!
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
ep 2693: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.33586916472271
ep 2694: game fini

ep 2701: game finished, reward: -1.0
ep 2701: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.34931083599175
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: 1.0 !!!!!!!!
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
ep 2702: game finished, reward: -1.0
resetting env. episode r

ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: 1.0 !!!!!!!!
ep 2711: game finished, reward: 1.0 !!!!!!!!
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
ep 2711: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.411808847223195
ep 2712: game finished, reward: -1.0
ep 2712: game finished, reward: -1.0
ep 2712: game finished, reward: -1.0
ep 2712: game f

ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: 1.0 !!!!!!!!
ep 2720: game finished, reward: 1.0 !!!!!!!!
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
ep 2720: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.42338183396239
ep 2721: game finished, reward: -1.0
ep 2721: game finished, reward: -1.0
ep 2721: game finished, reward: -1.0
ep 2721: game fi

ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
ep 2728: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.361861204576083
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: 1.0 !!!!!!!!
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: 1.0 !!!!!!!!
ep 2729: game finished, reward: -1.0
ep 2729: game finished, reward: -1.0
ep 2729: game f

resetting env. episode reward total was -20.0. running mean: -19.26703746798973
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: 1.0 !!!!!!!!
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: -1.0
ep 2737: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -19.0. runnin

ep 2745: game finished, reward: -1.0
ep 2745: game finished, reward: -1.0
ep 2745: game finished, reward: -1.0
ep 2745: game finished, reward: -1.0
ep 2745: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.27239152048913
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: 1.0 !!!!!!!!
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: 1.0 !!!!!!!!
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game finished, reward: -1.0
ep 2746: game fi

ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: 1.0 !!!!!!!!
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
ep 2754: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.220486114092832
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished, reward: -1.0
ep 2755: game finished,

ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: -1.0
ep 2763: game finished, reward: 1.0 !!!!!!!!
ep 2763: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.27771919524389
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: -1.0
ep 2764: game finished, reward: 1.0 !!!!!!!!
ep 2764: game fi

ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: 1.0 !!!!!!!!
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: 1.0 !!!!!!!!
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: 1.0 !!!!!!!!
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
ep 2772: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.28164376947579
ep 2773: game finished, reward: -1.0
ep 2773: game finished, reward: -1.0
ep 2773: game finished, reward: -1.0
ep 2773: game finished, reward: -1.0
ep 2773: game finished, reward: 1.0 !!!!!!!!


ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: 1.0 !!!!!!!!
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: 1.0 !!!!!!!!
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: 1.0 !!!!!!!!
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
ep 2781: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.276398341561443
ep 2782: game finished, reward: -1.0
ep 2782: game finished, reward: -1.0
ep 2782

ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: 1.0 !!!!!!!!
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: 1.0 !!!!!!!!
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
ep 2790: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.310316932503582
ep 2791: game finished, reward: -1.0
ep 2791: game finished, reward: -1.0
ep 2791: game finished, reward: -1.0
ep 2791: game f

ep 2798: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.297680539741542
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: 1.0 !!!!!!!!
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
ep 2799: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean:

ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: 1.0 !!!!!!!!
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
ep 2807: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.262529259469297
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: 1.0 !!!!!!!!
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game finished, reward: -1.0
ep 2808: game f

ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
ep 2816: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.260117154509253
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: 1.0 !!!!!!!!
ep 2817: game finished, reward: -1.0
ep 2817: game finished, reward: -1.0
ep 2817: game finished,

ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: 1.0 !!!!!!!!
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
ep 2825: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.306026839784145
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: 1.0 !!!!!!!!
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game finished, reward: -1.0
ep 2826: game f

ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: -1.0
ep 2834: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.357208151807818
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: 1.0 !!!!!!!!
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: -1.0
ep 2835: game finished, reward: 1.0 !!!!!!!!
ep 2835

ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: 1.0 !!!!!!!!
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: -1.0
ep 2843: game finished, reward: 1.0 !!!!!!!!
ep 2843: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -19.335433637141012
ep 2844: game finished, reward: -1.0
ep 2844: game finished, reward: -1.0
ep 2844: game finished, reward: -1.0
ep 2844

ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
ep 2851: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.29252896716536
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: 1.0 !!!!!!!!
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: 1.0 !!!!!!!!
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: 1.0 !!!!!!!!
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852: game finished, reward: -1.0
ep 2852:

ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: 1.0 !!!!!!!!
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
ep 2860: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.249252817056313
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished, reward: -1.0
ep 2861: game finished,

ep 2868: game finished, reward: -1.0
ep 2868: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.209137011468066
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: 1.0 !!!!!!!!
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
ep 2869: game finished, reward: -1.0
resetting env. episode 

ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: 1.0 !!!!!!!!
ep 2877: game finished, reward: -1.0
ep 2877: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.18056614755635
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, reward: -1.0
ep 2878: game finished, 

ep 2886: game finished, reward: 1.0 !!!!!!!!
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
ep 2886: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.21223473797103
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: -1.0
ep 2887: game finished, reward: 1.0 !!!!!!!!
ep 2887: game finished, reward: -1.0
ep 2887: game fi

ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: 1.0 !!!!!!!!
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: 1.0 !!!!!!!!
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: 1.0 !!!!!!!!
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: -1.0
ep 2895: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -17.0. running mean: -19.230475529647137
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0
ep 2896: game finished, reward: -1.0

ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
ep 2904: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.25819674613266
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: -1.0
ep 2905: game finished, reward: 

ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: 1.0 !!!!!!!!
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: 1.0 !!!!!!!!
ep 2913: game finished, reward: -1.0
ep 2913: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.24643973389207
ep 2914: game finished, reward: -1.0
ep 2914: game finished, reward: -1.0
ep 2914: game finished, reward: -1.0
ep 2914: game fi

resetting env. episode reward total was -20.0. running mean: -19.26554246043216
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
ep 2922: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.28288703582784
ep 2923: game finished, reward: -1.0
ep 2923: game finished, re

ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: 1.0 !!!!!!!!
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
ep 2930: game finished, reward: -1.0
resetting env. episode reward total was -16.0. running mean: -19.232281422384425
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: 1.0 !!!!!!!!
ep 2931: game finished, reward: -1.0
ep 2931: game finished, reward: -1.0
ep 2931: game f

ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
ep 2939: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.279949253175353
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: 1.0 !!!!!!!!
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: 1.0 !!!!!!!!
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game finished, reward: -1.0
ep 2940: game f

ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: 1.0 !!!!!!!!
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: 1.0 !!!!!!!!
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
ep 2948: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.27456320288986
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: 1.0 !!!!!!!!
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: 1.0 !!!!!!!!
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0
ep 2949: game finished, reward: -1.0


ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: 1.0 !!!!!!!!
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: 1.0 !!!!!!!!
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
ep 2957: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.279258675250265
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game finished, reward: -1.0
ep 2958: game f

ep 2965: game finished, reward: -1.0
ep 2965: game finished, reward: -1.0
resetting env. episode reward total was -16.0. running mean: -19.2371138278183
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
ep 2966: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.25474

ep 2974: game finished, reward: -1.0
ep 2974: game finished, reward: -1.0
ep 2974: game finished, reward: 1.0 !!!!!!!!
ep 2974: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.25448306968656
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, reward: -1.0
ep 2975: game finished, 

ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: 1.0 !!!!!!!!
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
ep 2983: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.26053160684082
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, reward: -1.0
ep 2984: game finished, 

ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: 1.0 !!!!!!!!
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
ep 2992: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.219082363095517
ep 2993: game finished, reward: -1.0
ep 2993: game finished, reward: -1.0
ep 2993: game finished, reward: -1.0
ep 2993: game finished, reward: -1.0
ep 2993: game finished,

resetting env. episode reward total was -15.0. running mean: -19.229318245746253
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: 1.0 !!!!!!!!
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
ep 3001: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.23702506328879
ep 3002: game fin

ep 3009: game finished, reward: -1.0
ep 3009: game finished, reward: -1.0
ep 3009: game finished, reward: -1.0
ep 3009: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -19.0. running mean: -19.236583519542336
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished, reward: -1.0
ep 3010: game finished,

ep 3018: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.312034275431945
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: 1.0 !!!!!!!!
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: 1.0 !!!!!!!!
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
ep 3019: game finished, reward: -1.0
resetting env. 

ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: 1.0 !!!!!!!!
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: -1.0
ep 3027: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -19.0. running mean: -19.26526811936829
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game finished, reward: -1.0
ep 3028: game fi

ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
ep 3036: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.29980649060817
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: 1.0 !!!!!!!!
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, reward: -1.0
ep 3037: game finished, 

ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: 1.0 !!!!!!!!
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: 1.0 !!!!!!!!
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: 1.0 !!!!!!!!
ep 3045: game finished, reward: 1.0 !!!!!!!!
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
ep 3045: game finished, reward: -1.0
resetting env. episode reward total was -16.0. running mean: -19.283276499280113
ep 3046: game finished, reward: -1.0
ep 3046: game finished, reward: -1.0
ep 3046: game finished, reward: -1.0
ep 3046: game finished, reward: -1.0
ep 3046: game finished, reward: -1.0
ep 3046: game finished, reward: -1.0

ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: 1.0 !!!!!!!!
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: 1.0 !!!!!!!!
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
ep 3054: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.2864516426966
ep 3055: game finished, reward: -1.0
ep 3055: game finished, reward: -1.0
ep 3055: game finished, reward: -1.0
ep 3055: game fin

ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: 1.0 !!!!!!!!
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: -1.0
ep 3063: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -19.0. running mean: -19.309623988679242
ep 3064: game finished, reward: -1.0
ep 3064: game finished, reward: -1.0
ep 3064: game finished, reward: -1.0
ep 3064: game f

ep 3071: game finished, reward: 1.0 !!!!!!!!
ep 3071: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.28725073535928
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: -1.0
ep 3072: game finished, reward: 1.0 !!!!!!!!
ep 3072: game finished, reward: -1.0
resetting env. e

ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: 1.0 !!!!!!!!
ep 3080: game finished, reward: 1.0 !!!!!!!!
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
ep 3080: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.243680741086216
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: 1.0 !!!!!!!!
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081: game finished, reward: -1.0
ep 3081

ep 3089: game finished, reward: 1.0 !!!!!!!!
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
ep 3089: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.28065667959416
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: -1.0
ep 3090: game finished, reward: 1.0 !!!!!!!!
ep 3090: game finished, reward: -1.0
ep 3090: game fi

ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: 1.0 !!!!!!!!
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
ep 3098: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.26656913216663
ep 3099: game finished, reward: -1.0
ep 3099: game finished, reward: -1.0
ep 3099: game finished, reward: -1.0
ep 3099: game finished, reward: -1.0
ep 3099: game finished, 

resetting env. episode reward total was -21.0. running mean: -19.276175085185084
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: 1.0 !!!!!!!!
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: 1.0 !!!!!!!!
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
ep 3107: game finished, reward: -1.0
resetting env. episode reward total was -19.0. runni

ep 3115: game finished, reward: -1.0
ep 3115: game finished, reward: -1.0
ep 3115: game finished, reward: -1.0
ep 3115: game finished, reward: -1.0
ep 3115: game finished, reward: -1.0
ep 3115: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.272301407020915
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: 1.0 !!!!!!!!
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: 1.0 !!!!!!!!
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game finished, reward: -1.0
ep 3116: game f

ep 3124: game finished, reward: 1.0 !!!!!!!!
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: 1.0 !!!!!!!!
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: 1.0 !!!!!!!!
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
ep 3124: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.259506009818256
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: 1.0 !!!!!!!!
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0
ep 3125: game finished, reward: -1.0

ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
ep 3133: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.255221295721977
ep 3134: game finished, reward: -1.0
ep 3134: game finished, reward: -1.0
ep 3134: game finished, reward: -1.0
ep 3134: game finished, reward: -1.0
ep 3134: game finished, reward: -1.0
ep 3134: game finished, reward:

ep 3141: game finished, reward: -1.0
ep 3141: game finished, reward: 1.0 !!!!!!!!
resetting env. episode reward total was -20.0. running mean: -19.23482682822648
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: 1.0 !!!!!!!!
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
ep 3142: game finished, reward: -1.0
resetting env. e

ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: 1.0 !!!!!!!!
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: 1.0 !!!!!!!!
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
ep 3150: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.135338745795053
ep 3151: game finished, reward: -1.0
ep 3151: game finished, reward: -1.0
ep 3151: game finished, reward: -1.0
ep 3151: game finished, reward: -1.0
ep 3151: game finished, reward: -1.0
ep 3151: game finished, reward: -1.0
ep 3151: game f

ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
ep 3159: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.191208894418157
ep 3160: game finished, reward: -1.0
ep 3160: game finished, reward: -1.0
ep 3160: game finished, reward: -1.0
ep 3160: game finished, reward: -1.0
ep 3160: game finished, reward: -1.0
ep 3160: game finished, reward:

ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
ep 3167: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.118021955227892
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward: -1.0
ep 3168: game finished, reward:

ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
ep 3176: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.12618163043611
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: 1.0 !!!!!!!!
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, reward: -1.0
ep 3177: game finished, 

ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: 1.0 !!!!!!!!
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
ep 3185: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.12565931166274
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, reward: -1.0
ep 3186: game finished, 

ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
ep 3193: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean: -19.048037345625318
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: 1.0 !!!!!!!!
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished, reward: -1.0
ep 3194: game finished,

ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: 1.0 !!!!!!!!
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
ep 3202: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.09085878576942
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, reward: -1.0
ep 3203: game finished, 

ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
ep 3211: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.07348947055936
ep 3212: game finished, reward: -1.0
ep 3212: game finished, reward: -1.0
ep 3212: game finished, reward: -1.0
ep 3212: game finished, reward: -1.0
ep 3212: game finished, reward: -1.0
ep 3212: game finished, reward: 

ep 3220: game finished, reward: 1.0 !!!!!!!!
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: 1.0 !!!!!!!!
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: -1.0
ep 3220: game finished, reward: 1.0 !!!!!!!!
ep 3220: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.142840197330045
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: 1.0 !!!!!!!!
ep 3221: game finished, reward: -1.0
ep 3221: game finished, reward: -1.0

ep 3228: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.123177361553278
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: 1.0 !!!!!!!!
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
ep 3229: game finished, reward: -1.0
resetting env. episode reward total was -20.0. running mean:

ep 3237: game finished, reward: -1.0
ep 3237: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.169406318380872
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: 1.0 !!!!!!!!
ep 3238: game finished, reward: 1.0 !!!!!!!!
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: 1.0 !!!!!!!!
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238: game finished, reward: -1.0
ep 3238

ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
ep 3246: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.144749908914413
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: 1.0 !!!!!!!!
ep 3247: game finished, reward: -1.0
ep 3247: game finished, reward: 1.0 !!!!!!!!
ep 3247: game f

ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: 1.0 !!!!!!!!
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: 1.0 !!!!!!!!
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: 1.0 !!!!!!!!
ep 3255: game finished, reward: -1.0
ep 3255: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.102615923405786
ep 3256: game finished, reward: -1.0
ep 3256: game finished, reward: -1.0
ep 3256

ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
ep 3264: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.170900750083668
ep 3265: game finished, reward: -1.0
ep 3265: game finished, reward: -1.0
ep 3265: game finished, reward: -1.0
ep 3265: game finished, reward: -1.0
ep 3265: game finished, reward: -1.0
ep 3265: game finished, reward:

ep 3272: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.157408533293527
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: 1.0 !!!!!!!!
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: 1.0 !!!!!!!!
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: 1.0 !!!!!!!!
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: 1.0 !!!!!!!!
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: -1.0
ep 3273: game finished, reward: 1.0 !!!!!!!!
ep 3273: game finished, rewa

ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: 1.0 !!!!!!!!
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
ep 3281: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.107167959399607
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: 1.0 !!!!!!!!
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game finished, reward: -1.0
ep 3282: game f

ep 3290: game finished, reward: 1.0 !!!!!!!!
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: 1.0 !!!!!!!!
ep 3290: game finished, reward: -1.0
ep 3290: game finished, reward: -1.0
resetting env. episode reward total was -19.0. running mean: -19.079149011544352
ep 3291: game finished, reward: -1.0
ep 3291: game finished, reward: -1.0
ep 3291: game finished, reward: -1.0
ep 3291: game f

ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
ep 3298: game finished, reward: -1.0
resetting env. episode reward total was -21.0. running mean: -19.034805736002646
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward: -1.0
ep 3299: game finished, reward:

ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: 1.0 !!!!!!!!
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: 1.0 !!!!!!!!
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: 1.0 !!!!!!!!
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
ep 3307: game finished, reward: -1.0
resetting env. episode reward total was -18.0. running mean: -19.001324009265257
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308: game finished, reward: -1.0
ep 3308