In [None]:
# this is a Deep Q Learning (DQN) agent including replay memory and a target network 
# you can write a brief 8-10 line abstract detailing your submission and experiments here
# the code is based on https://github.com/seungeunrho/minimalRL/blob/master/dqn.py, which is released under the MIT licesne
# make sure you reference any code you have studied as above, with one comment line per reference
# Code makes use of parts of: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
# Code makes use of parts of: https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
# Code makes use of parts of: https://github.com/higgsfield/RL-Adventure/blob/master/7.rainbow%20dqn.ipynb
# Code makes use of parts of: https://github.com/higgsfield/RL-Adventure/blob/master/common/replay_buffer.py
# Code makes use of parts of: https://github.com/higgsfield/RL-Adventure/blob/master/common/layers.py
# Code makes use of parts of: https://github.com/higgsfield/RL-Adventure/blob/master/common/wrappers.py
'''
Use of dueling to reduce overestimation in action value function. Expected action value accross all actions from a given state should be zero. If action values are overestimated, expected value will be > 0.
This will naturally temper overestimation when formulated as in dueling.  Replacement of epsilon greedy with noisy linear layers such that extent of
exploration can be learnt. I implemented this from scratch through reading the paper, however, due to bugs had to reference the 
rl-adventure implementation, hence some similarities. Prioritised replay buffer to ensure that network is trained most on those experinces that have proven to be
most beneficial. Limited memory on colab has meant it must be much smaller than specified in paper, considerably less effective. Use of distributional rl where by seek to approximate the
distribution of returns for each action with a multinomial categorical distribution, increases the stability of training. 

Please also note I have run my model over several seperate sessions i.e one after the other due to time outs. I increased the number of videos recorded and scores printed in the most recent run.
'''

import math, random
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F
import operator
import collections
import matplotlib.pyplot as plt
import cv2
cv2.ocl.setUseOpenCL(False)

from IPython.display import clear_output
from torch.autograd import Variable
from collections import deque
from gym import spaces
%matplotlib inline

class SegmentTree(object):
    def __init__(self, capacity, operation, neutral_element):
        assert capacity > 0 and capacity & (capacity - 1) == 0, "capacity must be positive and a power of 2."
        self._capacity = capacity
        self._value = [neutral_element for _ in range(2 * capacity)]
        self._operation = operation

    def _reduce_helper(self, start, end, node, node_start, node_end):
        if start == node_start and end == node_end:
            return self._value[node]
        mid = (node_start + node_end) // 2
        if end <= mid:
            return self._reduce_helper(start, end, 2 * node, node_start, mid)
        else:
            if mid + 1 <= start:
                return self._reduce_helper(start, end, 2 * node + 1, mid + 1, node_end)
            else:
                return self._operation(
                    self._reduce_helper(start, mid, 2 * node, node_start, mid),
                    self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end)
                )

    def reduce(self, start=0, end=None):
        if end is None:
            end = self._capacity
        if end < 0:
            end += self._capacity
        end -= 1
        return self._reduce_helper(start, end, 1, 0, self._capacity - 1)

    def __setitem__(self, idx, val):
        idx += self._capacity
        self._value[idx] = val
        idx //= 2
        while idx >= 1:
            self._value[idx] = self._operation(
                self._value[2 * idx],
                self._value[2 * idx + 1]
            )
            idx //= 2

    def __getitem__(self, idx):
        assert 0 <= idx < self._capacity
        return self._value[self._capacity + idx]


class SumSegmentTree(SegmentTree):
    def __init__(self, capacity):
        super(SumSegmentTree, self).__init__(
            capacity=capacity,
            operation=operator.add,
            neutral_element=0.0
        )

    def sum(self, start=0, end=None):
        return super(SumSegmentTree, self).reduce(start, end)

    def find_prefixsum_idx(self, prefixsum):
        assert 0 <= prefixsum <= self.sum() + 1e-5
        idx = 1
        while idx < self._capacity:  # while non-leaf
            if self._value[2 * idx] > prefixsum:
                idx = 2 * idx
            else:
                prefixsum -= self._value[2 * idx]
                idx = 2 * idx + 1
        return idx - self._capacity


class MinSegmentTree(SegmentTree):
    def __init__(self, capacity):
        super(MinSegmentTree, self).__init__(
            capacity=capacity,
            operation=min,
            neutral_element=float('inf')
        )

    def min(self, start=0, end=None):
        return super(MinSegmentTree, self).reduce(start, end)


class ReplayBuffer(object):
    def __init__(self, size):
        self._storage = []
        self._maxsize = size
        self._next_idx = 0

    def __len__(self):
        return len(self._storage)

    def push(self, state, action, reward, next_state, done):
        data = (state, action, reward, next_state, done)

        if self._next_idx >= len(self._storage):
            self._storage.append(data)
        else:
            self._storage[self._next_idx] = data
        self._next_idx = (self._next_idx + 1) % self._maxsize

    def _encode_sample(self, idxes):
        obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
        for i in idxes:
            data = self._storage[i]
            obs_t, action, reward, obs_tp1, done = data
            obses_t.append(np.array(obs_t, copy=False))
            actions.append(np.array(action, copy=False))
            rewards.append(reward)
            obses_tp1.append(np.array(obs_tp1, copy=False))
            dones.append(done)
        return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)

    def sample(self, batch_size):
        idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
        return self._encode_sample(idxes)


class PrioritizedReplayBuffer(ReplayBuffer):
    def __init__(self, size, alpha):
        super(PrioritizedReplayBuffer, self).__init__(size)
        assert alpha > 0
        self._alpha = alpha

        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0

    def push(self, *args, **kwargs):
        idx = self._next_idx
        super(PrioritizedReplayBuffer, self).push(*args, **kwargs)
        self._it_sum[idx] = self._max_priority ** self._alpha
        self._it_min[idx] = self._max_priority ** self._alpha

    def _sample_proportional(self, batch_size):
        res = []
        for _ in range(batch_size):
            mass = random.random() * self._it_sum.sum(0, len(self._storage) - 1)
            idx = self._it_sum.find_prefixsum_idx(mass)
            res.append(idx)
        return res

    def sample(self, batch_size, beta):
        assert beta > 0

        idxes = self._sample_proportional(batch_size)

        weights = []
        p_min = self._it_min.min() / self._it_sum.sum()
        max_weight = (p_min * len(self._storage)) ** (-beta)

        for idx in idxes:
            p_sample = self._it_sum[idx] / self._it_sum.sum()
            weight = (p_sample * len(self._storage)) ** (-beta)
            weights.append(weight / max_weight)
        weights = np.array(weights)
        encoded_sample = self._encode_sample(idxes)
        return tuple(list(encoded_sample) + [weights, idxes])

    def update_priorities(self, idxes, priorities):
        assert len(idxes) == priorities.size
        for idx, priority in zip(idxes, priorities):
            assert priority > 0
            assert 0 <= idx < len(self._storage)
            self._it_sum[idx] = priority ** self._alpha
            self._it_min[idx] = priority ** self._alpha

            self._max_priority = max(self._max_priority, priority)

def projection_distribution(next_state, rewards, dones):
    batch_size  = next_state.size(0)
    
    delta_z = float(Vmax - Vmin) / (num_atoms - 1)
    support = torch.linspace(Vmin, Vmax, num_atoms)
    
    next_dist = target_model(next_state).to(device)
    next_dist = next_dist.data.cpu() * support
    next_action = next_dist.sum(2).max(1)[1]
    next_action = next_action.unsqueeze(1).unsqueeze(1).expand(next_dist.size(0), 1, next_dist.size(2)).to(device)
    next_dist   = next_dist.to(device).gather(1, next_action).squeeze(1).to(device)
        
    rewards = rewards.unsqueeze(1).expand_as(next_dist).to(device)
    dones   = dones.unsqueeze(1).expand_as(next_dist).to(device)
    support = support.unsqueeze(0).expand_as(next_dist).to(device)
    
    Tz = rewards + (1 - dones) * 0.99 * support
    Tz = Tz.clamp(min=Vmin, max=Vmax)
    b  = (Tz - Vmin) / delta_z
    l  = b.floor().long()
    u  = b.ceil().long()
        
    offset = torch.linspace(0, (batch_size - 1) * num_atoms, batch_size).long()\
                    .unsqueeze(1).expand(batch_size, num_atoms).to(device)

    proj_dist = torch.zeros(next_dist.size()).to(device) 
    proj_dist.view(-1).index_add_(0, (l + offset).view(-1), (next_dist * (u.float() - b)).view(-1))
    proj_dist.view(-1).index_add_(0, (u + offset).view(-1), (next_dist * (b - l.float())).view(-1))
        
    return proj_dist

def compute_td_loss(batch_size):
    state, action, reward, next_state, done, weights, indices = replay_buffer.sample(batch_size, 0.4) 
    state      = Variable(torch.FloatTensor(np.float32(state))).to(device)
    next_state = Variable(torch.FloatTensor(np.float32(next_state))).to(device).detach()
    action     = Variable(torch.LongTensor(action)).to(device)
    reward     = torch.FloatTensor(reward).to(device)
    done       = torch.FloatTensor(np.float32(done)).to(device)
    weights       = torch.FloatTensor(np.float32(weights)).to(device)
    proj_dist = projection_distribution(next_state, reward, done)
    
    dist = current_model(state)
    action = action.unsqueeze(1).unsqueeze(1).expand(batch_size, 1, num_atoms)
    dist = dist.gather(1, action).squeeze(1)
    dist.data.clamp_(0.01, 0.99)
    loss = -(Variable(proj_dist) * dist.log()).sum(1) * weights
    
    priorities = 1e-5 + loss.abs()
    loss  = loss.mean()
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    replay_buffer.update_priorities(indices, priorities.detach().cpu().numpy())
    current_model.reset_noise()
    target_model.reset_noise()
    
    return loss


class FireResetEnv(gym.Wrapper):
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

    def reset(self, **kwargs):
        self.env.reset(**kwargs)
        obs, _, done, _ = self.env.step(1)
        if done:
            self.env.reset(**kwargs)
        obs, _, done, _ = self.env.step(2)
        if done:
            self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)


class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]), dtype=np.uint8)

    def observation(self, observation):
        return np.swapaxes(observation, 2, 0)


def setup_env():
    seed = 742
    #env_id = "PongNoFrameskip-v4"
    env_id = "GravitarNoFrameskip-v4"
    env = gym.make(env_id)
    env = gym.wrappers.AtariPreprocessing(env, grayscale_obs=False)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env    = ImageToPyTorch(env)
    env = gym.wrappers.Monitor(env, "./video", video_callable=lambda episode_id: (episode_id%video_every)==0,force=True)

    env.seed(seed)
    env.action_space.seed(seed)

    # reproducible environment and action spaces, do not change lines 6-11 here (tools > settings > editor > show line numbers)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    return env

class Linear_Noisy(nn.Module):
    def __init__(self, in_size, out_size):
        super(Linear_Noisy, self).__init__()
        self.in_size = in_size
        self.out_size = out_size

        self.w_mu = nn.Parameter(torch.FloatTensor(out_size, in_size))
        nn.init.uniform_(self.w_mu, -1/math.sqrt(in_size), 1/math.sqrt(in_size))

        self.w_sigma = nn.Parameter(torch.FloatTensor(out_size, in_size))
        nn.init.constant_(self.w_sigma, 0.4/math.sqrt(in_size))

        self.register_buffer('w_noise', torch.FloatTensor(out_size, in_size))
        
        self.b_mu = nn.Parameter(torch.FloatTensor(out_size))
        nn.init.uniform_(self.b_mu, -1/math.sqrt(in_size), 1/math.sqrt(in_size))

        self.b_sigma = nn.Parameter(torch.FloatTensor(out_size))
        nn.init.constant_(self.b_sigma, 0.4/math.sqrt(out_size))
        
        self.register_buffer('b_noise', torch.FloatTensor(out_size))

    def forward(self, x):
        w = self.w_mu + (self.w_sigma * self.w_noise.to(device))
        b = self.b_mu + (self.b_sigma * self.b_noise.to(device))
        return F.linear(x, w, b).to(device)
   
class RainbowCnnDQN(nn.Module):
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowCnnDQN, self).__init__()
        
        self.input_shape   = input_shape
        self.num_actions  = num_actions
        self.num_atoms    = num_atoms
        self.Vmin         = Vmin
        self.Vmax         = Vmax
        
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )
        
        self.noisy_value1 = Linear_Noisy(64*49, 512).to(device)
        self.noisy_value2 = Linear_Noisy(512, self.num_atoms).to(device)
        
        self.noisy_advantage1 = Linear_Noisy(64*49, 512).to(device)
        self.noisy_advantage2 = Linear_Noisy(512, self.num_atoms * self.num_actions).to(device)
        self.reset_noise()
        
    def forward(self, x):
        batch_size = x.size(0)
        
        x = x.to(device) / 255.
        x = self.features(x)
        x = x.view(batch_size, -1)

        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)
        
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)
        
        val     = value.view(batch_size, 1, self.num_atoms)
        adv = advantage.view(batch_size, self.num_actions, self.num_atoms)
        
        q_vals = val + adv - torch.mean(adv, 1, True)
        q_vals = F.softmax(q_vals.view(-1, self.num_atoms)).view(-1, self.num_actions, self.num_atoms)
        
        return q_vals
        
    def reset_noise(self):
        self.noisy_value1.b_noise = self.f(torch.randn(512))
        self.noisy_value1.w_noise = self.f(torch.randn(512, 64*49))

        self.noisy_value2.b_noise = self.f(torch.randn(self.num_atoms))
        self.noisy_value2.w_noise = self.f(torch.randn(self.num_atoms, 512))

        self.noisy_advantage1.b_noise = self.f(torch.randn(512))
        self.noisy_advantage1.w_noise = self.f(torch.randn(512, 64*49))

        self.noisy_advantage2.b_noise = self.f(torch.randn(self.num_atoms * self.num_actions))
        self.noisy_advantage2.w_noise = self.f(torch.randn(self.num_atoms * self.num_actions, 512))
    
    def f(self, g_rand):
        g_sgn = g_rand.sign()
        return g_sgn * (g_sgn * g_rand).sqrt()

    def feature_size(self):
        return self.features(autograd.Variable(torch.zeros(1, *self.input_shape).to(device))).view(1, -1).size(1)
    
    def act(self, state):
        state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0)).to(device).detach()
        dist = self.forward(state).data.cpu()
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action

def get_state(old_lives):
    next_state, reward, done, _ = env.step(action)
    lives = env.unwrapped.ale.lives()
    if lives < old_lives and lives > 0:
        done = True
    return next_state, reward, done, _, lives
    

# hyperparameters
env = setup_env()
gamma         = 0.99
num_frames = 1000000
batch_size    = 32
video_every   = 2
print_every   = 2

num_atoms = 51
Vmin = -10
Vmax = 10

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
current_model = RainbowCnnDQN(env.observation_space.shape, env.action_space.n, num_atoms, Vmin, Vmax).to(device)
target_model  = RainbowCnnDQN(env.observation_space.shape, env.action_space.n, num_atoms, Vmin, Vmax).to(device)

Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda()
optimizer = optim.Adam(current_model.parameters(), lr=0.0001)

replay_initial = 10000
replay_buffer  = PrioritizedReplayBuffer(20000, 0.8)

score    = 0.0
frame_idx = 0
marking  = []
losses = []    
episode = 0

In [None]:
from google.colab import drive
drive.mount('/content/drive')
params = torch.load('drive/My Drive/save-prioritised1.chkpt')

current_model.load_state_dict(params['R'])
optimizer.load_state_dict(params['optimiser'])
target_model.load_state_dict(current_model.state_dict())
frame_idx = params['frame_idx']
replay_buffer._storage = params['replay_buffer'][:100000]
episode = params['episode'] + 1

Mounted at /content/drive


In [None]:
for n_episode in range(episode, int(1e32)):
    state = env.reset()
    lives = env.unwrapped.ale.lives()
    done = False
    score = 0.0

    while lives:
        frame_idx += 1
        action = current_model.act(state)

        next_state, reward, done, _, lives = get_state(lives)
        score += reward
        reward = reward / 100.0
        reward -= 0.5 if done else 0.0       

        replay_buffer.push(state, action, reward, next_state, done)
        state = next_state

        if len(replay_buffer) > replay_initial:
            loss = compute_td_loss(batch_size)
            losses.append(loss.data.item())    

        if frame_idx % 1000 == 0:
            torch.save({'R': current_model.state_dict(), 'optimiser':optimizer.state_dict(), 'frame_idx': frame_idx, 'replay_buffer': replay_buffer._storage, 'episode': n_episode}, 'drive/My Drive/save-prioritised1.chkpt')
            target_model.load_state_dict(current_model.state_dict())

    # do not change lines 44-48 here, they are for marking the submission log
    marking.append(score)
    if n_episode%100 == 0:
        print("marking, episode: {}, score: {:.1f}, mean_score: {:.2f}, std_score: {:.2f}".format(
            n_episode, score, np.array(marking).mean(), np.array(marking).std()))
        marking = []


    # you can change this part, and print any data you like (so long as it doesn't start with "marking")
    if n_episode%print_every==0 and n_episode!=0:
        target_model.load_state_dict(current_model.state_dict())
        print("episode: {}, score: {:.1f}, epsilon: {:.2f}".format(n_episode, score, 0))



episode: 830, score: 0.0, epsilon: 0.00




episode: 832, score: 600.0, epsilon: 0.00




episode: 834, score: 0.0, epsilon: 0.00




episode: 836, score: 100.0, epsilon: 0.00




episode: 838, score: 500.0, epsilon: 0.00




episode: 840, score: 500.0, epsilon: 0.00




episode: 842, score: 500.0, epsilon: 0.00




episode: 844, score: 200.0, epsilon: 0.00




episode: 846, score: 0.0, epsilon: 0.00




episode: 848, score: 100.0, epsilon: 0.00




episode: 850, score: 250.0, epsilon: 0.00




episode: 852, score: 0.0, epsilon: 0.00




episode: 854, score: 100.0, epsilon: 0.00




episode: 856, score: 0.0, epsilon: 0.00




episode: 858, score: 100.0, epsilon: 0.00




episode: 860, score: 250.0, epsilon: 0.00




episode: 862, score: 250.0, epsilon: 0.00




episode: 864, score: 0.0, epsilon: 0.00




episode: 866, score: 0.0, epsilon: 0.00




episode: 868, score: 350.0, epsilon: 0.00




episode: 870, score: 0.0, epsilon: 0.00




episode: 872, score: 100.0, epsilon: 0.00




episode: 874, score: 250.0, epsilon: 0.00




episode: 876, score: 0.0, epsilon: 0.00




episode: 878, score: 250.0, epsilon: 0.00




episode: 880, score: 250.0, epsilon: 0.00




episode: 882, score: 0.0, epsilon: 0.00




episode: 884, score: 350.0, epsilon: 0.00




episode: 886, score: 600.0, epsilon: 0.00




episode: 888, score: 100.0, epsilon: 0.00




episode: 890, score: 0.0, epsilon: 0.00




episode: 892, score: 350.0, epsilon: 0.00




episode: 894, score: 250.0, epsilon: 0.00




episode: 896, score: 0.0, epsilon: 0.00




episode: 898, score: 100.0, epsilon: 0.00




marking, episode: 900, score: 250.0, mean_score: 195.77, std_score: 212.92
episode: 900, score: 250.0, epsilon: 0.00




episode: 902, score: 500.0, epsilon: 0.00




episode: 904, score: 0.0, epsilon: 0.00




episode: 906, score: 750.0, epsilon: 0.00




episode: 908, score: 0.0, epsilon: 0.00




episode: 910, score: 0.0, epsilon: 0.00




episode: 912, score: 0.0, epsilon: 0.00




episode: 914, score: 100.0, epsilon: 0.00




episode: 916, score: 250.0, epsilon: 0.00




episode: 918, score: 0.0, epsilon: 0.00




episode: 920, score: 0.0, epsilon: 0.00




episode: 922, score: 0.0, epsilon: 0.00




episode: 924, score: 0.0, epsilon: 0.00




episode: 926, score: 100.0, epsilon: 0.00




episode: 928, score: 100.0, epsilon: 0.00




episode: 930, score: 100.0, epsilon: 0.00




episode: 932, score: 100.0, epsilon: 0.00




episode: 934, score: 0.0, epsilon: 0.00




episode: 936, score: 0.0, epsilon: 0.00




episode: 938, score: 0.0, epsilon: 0.00




episode: 940, score: 0.0, epsilon: 0.00




episode: 942, score: 0.0, epsilon: 0.00




episode: 944, score: 0.0, epsilon: 0.00




episode: 946, score: 700.0, epsilon: 0.00




episode: 948, score: 0.0, epsilon: 0.00




episode: 950, score: 0.0, epsilon: 0.00




episode: 952, score: 250.0, epsilon: 0.00




episode: 954, score: 500.0, epsilon: 0.00




episode: 956, score: 350.0, epsilon: 0.00




episode: 958, score: 100.0, epsilon: 0.00




episode: 960, score: 0.0, epsilon: 0.00




episode: 962, score: 0.0, epsilon: 0.00




episode: 964, score: 100.0, epsilon: 0.00




episode: 966, score: 100.0, epsilon: 0.00




episode: 968, score: 0.0, epsilon: 0.00




episode: 970, score: 0.0, epsilon: 0.00




episode: 972, score: 0.0, epsilon: 0.00




episode: 974, score: 0.0, epsilon: 0.00




episode: 976, score: 0.0, epsilon: 0.00




episode: 978, score: 100.0, epsilon: 0.00




episode: 980, score: 0.0, epsilon: 0.00




episode: 982, score: 0.0, epsilon: 0.00




episode: 984, score: 0.0, epsilon: 0.00




episode: 986, score: 0.0, epsilon: 0.00




episode: 988, score: 250.0, epsilon: 0.00




episode: 990, score: 100.0, epsilon: 0.00




episode: 992, score: 850.0, epsilon: 0.00




episode: 994, score: 0.0, epsilon: 0.00




episode: 996, score: 0.0, epsilon: 0.00




episode: 998, score: 500.0, epsilon: 0.00




marking, episode: 1000, score: 0.0, mean_score: 122.00, std_score: 189.52
episode: 1000, score: 0.0, epsilon: 0.00




episode: 1002, score: 0.0, epsilon: 0.00




episode: 1004, score: 500.0, epsilon: 0.00




episode: 1006, score: 500.0, epsilon: 0.00




episode: 1008, score: 0.0, epsilon: 0.00




episode: 1010, score: 250.0, epsilon: 0.00




episode: 1012, score: 200.0, epsilon: 0.00




episode: 1014, score: 0.0, epsilon: 0.00




episode: 1016, score: 250.0, epsilon: 0.00




episode: 1018, score: 350.0, epsilon: 0.00




episode: 1020, score: 250.0, epsilon: 0.00




episode: 1022, score: 100.0, epsilon: 0.00




episode: 1024, score: 250.0, epsilon: 0.00




episode: 1026, score: 250.0, epsilon: 0.00




episode: 1028, score: 700.0, epsilon: 0.00




episode: 1030, score: 250.0, epsilon: 0.00




episode: 1032, score: 0.0, epsilon: 0.00




episode: 1034, score: 250.0, epsilon: 0.00




episode: 1036, score: 450.0, epsilon: 0.00




episode: 1038, score: 100.0, epsilon: 0.00




episode: 1040, score: 0.0, epsilon: 0.00




episode: 1042, score: 0.0, epsilon: 0.00




episode: 1044, score: 600.0, epsilon: 0.00




episode: 1046, score: 600.0, epsilon: 0.00




episode: 1048, score: 0.0, epsilon: 0.00




episode: 1050, score: 0.0, epsilon: 0.00




episode: 1052, score: 250.0, epsilon: 0.00




episode: 1054, score: 0.0, epsilon: 0.00


KeyboardInterrupt: ignored

In [None]:
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 830, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 832, score: 600.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 834, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 836, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 838, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 840, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 842, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 844, score: 200.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 846, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 848, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 850, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 852, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 854, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 856, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 858, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 860, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 862, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 864, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 866, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 868, score: 350.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 870, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 872, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 874, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 876, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 878, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 880, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 882, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 884, score: 350.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 886, score: 600.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 888, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 890, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 892, score: 350.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 894, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 896, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 898, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
marking, episode: 900, score: 250.0, mean_score: 195.77, std_score: 212.92
episode: 900, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 902, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 904, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 906, score: 750.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 908, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 910, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 912, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 914, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 916, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 918, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 920, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 922, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 924, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 926, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 928, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 930, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 932, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 934, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 936, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 938, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 940, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 942, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 944, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 946, score: 700.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 948, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 950, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 952, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 954, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 956, score: 350.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 958, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 960, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 962, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 964, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 966, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 968, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 970, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 972, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 974, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 976, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 978, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 980, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 982, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 984, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 986, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 988, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 990, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 992, score: 850.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 994, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 996, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 998, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
marking, episode: 1000, score: 0.0, mean_score: 122.00, std_score: 189.52
episode: 1000, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1002, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1004, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1006, score: 500.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1008, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1010, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1012, score: 200.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1014, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1016, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1018, score: 350.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1020, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1022, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1024, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1026, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1028, score: 700.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1030, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1032, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1034, score: 250.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1036, score: 450.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1038, score: 100.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1040, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1042, score: 0.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
episode: 1044, score: 600.0, epsilon: 0.00
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:393: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.