In [1]:
pip install tqdm



In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import gym
import numpy as np
from tqdm import tqdm

# Hyperparameters
gamma = 0.99
epsilon = 0.1
c1 = 0.5
c2 = 0.01
clip_range = 0.2
max_episodes = 1000
max_steps = 500
batch_size = 32
actor_lr = 0.001
critic_lr = 0.001

# Environment
env = gym.make('CartPole-v1')

# Actor and Critic Networks
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize Actor and Critic
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)

# Optimizers
actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)

# Experience Buffer
class ExperienceBuffer:
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.dones = []

    def add_experience(self, state, action, reward, next_state, done):
        self.states.append(state)
        self.actions.append(action)
        self.rewards.append(reward)
        self.next_states.append(next_state)
        self.dones.append(done)

    def sample_experience(self, batch_size):
        indices = np.random.choice(len(self.states), batch_size)
        states = np.array(self.states)[indices]
        actions = np.array(self.actions)[indices]
        rewards = np.array(self.rewards)[indices]
        next_states = np.array(self.next_states)[indices]
        dones = np.array(self.dones)[indices]
        return states, actions, rewards, next_states, dones

# Training Loop
experience_buffer = ExperienceBuffer()
for episode in tqdm(range(max_episodes)):
    state = env.reset()
    episode_reward = 0
    for step in range(max_steps):
        # Select action
        state_tensor = torch.tensor(state, dtype=torch.float32)
        action_logits = actor(state_tensor)
        action_distribution = Categorical(logits=action_logits)
        action = action_distribution.sample().item()

        # Take action
        next_state, reward, done, _ = env.step(action)
        episode_reward += reward

        # Add experience to buffer
        experience_buffer.add_experience(state, action, reward, next_state, done)

        # Update state
        state = next_state

        # Check if episode is done
        if done:
            break

    # Sample experience from buffer
    states, actions, rewards, next_states, dones = experience_buffer.sample_experience(batch_size)

    # Compute advantages
    advantages = []
    for i in range(batch_size):
        state_tensor = torch.tensor(states[i], dtype=torch.float32)
        next_state_tensor = torch.tensor(next_states[i], dtype=torch.float32)
        reward = rewards[i]
        done = dones[i]
        value = critic(state_tensor).item()
        next_value = critic(next_state_tensor).item()
        advantage = reward + gamma * next_value * (1 - done) - value
        advantages.append(advantage)
    advantages = np.array(advantages)

    # Compute clipped surrogate objective
    old_action_logits = actor(torch.tensor(states, dtype=torch.float32)).detach()
    old_action_distribution = Categorical(logits=old_action_logits)
    old_action_log_probs = old_action_distribution.log_prob(torch.tensor(actions))
    new_action_logits = actor(torch.tensor(states, dtype=torch.float32))
    new_action_distribution = Categorical(logits=new_action_logits)
    new_action_log_probs = new_action_distribution.log_prob(torch.tensor(actions))
    ratio = torch.exp(new_action_log_probs - old_action_log_probs)
    clipped_ratio = torch.clamp(ratio, 1 - clip_range, 1 + clip_range)
    clipped_surrogate_objective = torch.mean(torch.minimum(ratio * torch.tensor(advantages), clipped_ratio * torch.tensor(advantages)))

    # Compute value loss
    value_loss = torch.mean((critic(torch.tensor(states, dtype=torch.float32)) - torch.tensor(rewards + gamma * critic(torch.tensor(next_states, dtype=torch.float32)).detach() * (1 - torch.tensor(dones)))) ** 2)

    # Compute entropy loss
    entropy_loss = torch.mean(new_action_distribution.entropy())

    # Compute total loss
    total_loss = -clipped_surrogate_objective + c1 * value_loss - c2 * entropy_loss

    # Backpropagate and update
    actor_optimizer.zero_grad()
    critic_optimizer.zero_grad()
    total_loss.backward()
    actor_optimizer.step()
    critic_optimizer.step()

    # Print episode reward
    print(f'Episode {episode+1}, Reward: {episode_reward}')

# Test the trained agent
state = env.reset()
episode_reward = 0
while True:
    state_tensor = torch.tensor(state, dtype=torch.float32)
    action_logits = actor(state_tensor)
    action_distribution = Categorical(logits=action_logits)
    action = action_distribution.sample().item()
    next_state, reward, done, _ = env.step(action)
    episode_reward += reward
    state = next_state
    if done:
        break
print(f'Test Episode Reward: {episode_reward}')

  deprecation(
  deprecation(
  0%|          | 0/1000 [00:00<?, ?it/s]


RuntimeError: Subtraction, the `-` operator, with a bool tensor is not supported. If you are trying to invert a mask, use the `~` or `logical_not()` operator instead.

In [5]:
!pip install gymnasium

  and should_run_async(code)


Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import gymnasium as gym
import numpy as np
from tqdm import tqdm

# Hyperparameters
gamma = 0.99
epsilon = 0.1
c1 = 0.5
c2 = 0.01
clip_range = 0.2
max_episodes = 1000
max_steps = 500
batch_size = 32
actor_lr = 0.001
critic_lr = 0.001

# Environment
env = gym.make('CartPole-v1', render_mode='human')

# Actor and Critic Networks
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize Actor and Critic
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)

# Optimizers
actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)

# Experience Buffer
class ExperienceBuffer:
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.dones = []

    def add_experience(self, state, action, reward, next_state, done):
        self.states.append(state)
        self.actions.append(action)
        self.rewards.append(reward)
        self.next_states.append(next_state)
        self.dones.append(done)

    def sample_experience(self, batch_size):
        indices = np.random.choice(len(self.states), batch_size)
        states = np.array(self.states)[indices]
        actions = np.array(self.actions)[indices]
        rewards = np.array(self.rewards)[indices]
        next_states = np.array(self.next_states)[indices]
        dones = np.array(self.dones)[indices]
        return states, actions, rewards, next_states, dones

# Training Loop
experience_buffer = ExperienceBuffer()
for episode in tqdm(range(max_episodes)):
    state, _info = env.reset()
    episode_reward = 0
    for step in range(max_steps):
        # Select action
        state_tensor = torch.tensor(state, dtype=torch.float32)
        action_logits = actor(state_tensor)
        action_distribution = Categorical(logits=action_logits)
        action = action_distribution.sample().item()

        # Take action
        next_state, reward, terminated, truncated, _info = env.step(action)
        done = terminated or truncated
        episode_reward += reward

        # Add experience to buffer
        experience_buffer.add_experience(state, action, reward, next_state, done)

        # Update state
        state = next_state

        # Check if episode is done
        if done:
            break

    # Sample experience from buffer
    states, actions, rewards, next_states, dones = experience_buffer.sample_experience(batch_size)

    # Compute advantages
    advantages = []
    for i in range(batch_size):
        state_tensor = torch.tensor(states[i], dtype=torch.float32)
        next_state_tensor = torch.tensor(next_states[i], dtype=torch.float32)
        reward = rewards[i]
        done = dones[i]
        value = critic(state_tensor).item()
        next_value = critic(next_state_tensor).item()
        advantage = reward + gamma * next_value * (1 - done) - value
        advantages.append(advantage)
    advantages = np.array(advantages)

    # Compute clipped surrogate objective
    old_action_logits = actor(torch.tensor(states, dtype=torch.float32)).detach()
    old_action_distribution = Categorical(logits=old_action_logits)
    old_action_log_probs = old_action_distribution.log_prob(torch.tensor(actions))
    new_action_logits = actor(torch.tensor(states, dtype=torch.float32))
    new_action_distribution = Categorical(logits=new_action_logits)
    new_action_log_probs = new_action_distribution.log_prob(torch.tensor(actions))
    ratio = torch.exp(new_action_log_probs - old_action_log_probs)
    clipped_ratio = torch.clamp(ratio, 1 - clip_range, 1 + clip_range)
    clipped_surrogate_objective = torch.mean(torch.minimum(ratio * torch.tensor(advantages), clipped_ratio * torch.tensor(advantages)))

        # Compute value loss
    rewards_tensor = torch.tensor(rewards, dtype=torch.float32)
    dones_tensor = torch.tensor(dones, dtype=torch.float32)
    next_values = critic(torch.tensor(next_states, dtype=torch.float32)).detach()
    target_values = rewards_tensor + gamma * next_values * (1 - dones_tensor)
    current_values = critic(torch.tensor(states, dtype=torch.float32))
    value_loss = torch.mean((current_values - target_values) ** 2)

    # Compute entropy loss
    entropy_loss = torch.mean(new_action_distribution.entropy())

    # Compute total loss
    total_loss = -clipped_surrogate_objective + c1 * value_loss - c2 * entropy_loss

    # Backpropagate and update
    actor_optimizer.zero_grad()
    critic_optimizer.zero_grad()
    total_loss.backward()
    actor_optimizer.step()
    critic_optimizer.step()

    # Print episode reward
    print(f'Episode {episode+1}, Reward: {episode_reward}')

# Test the trained agent
state, _info = env.reset()
episode_reward = 0
while True:
    state_tensor = torch.tensor(state, dtype=torch.float32)
    action_logits = actor(state_tensor)
    action_distribution = Categorical(logits=action_logits)
    action = action_distribution.sample().item()
    next_state, reward, terminated, truncated, _info = env.step(action)
    done = terminated or truncated
    episode_reward += reward
    state = next_state
    if done:
        break
print(f'Test Episode Reward: {episode_reward}')

  0%|          | 1/1000 [00:00<11:45,  1.42it/s]

Episode 1, Reward: 23.0


  0%|          | 2/1000 [00:01<12:28,  1.33it/s]

Episode 2, Reward: 36.0


  0%|          | 3/1000 [00:01<10:15,  1.62it/s]

Episode 3, Reward: 21.0


  0%|          | 4/1000 [00:02<09:59,  1.66it/s]

Episode 4, Reward: 26.0


  0%|          | 5/1000 [00:02<08:05,  2.05it/s]

Episode 5, Reward: 12.0


  1%|          | 6/1000 [00:03<07:52,  2.10it/s]

Episode 6, Reward: 21.0


  1%|          | 7/1000 [00:03<06:56,  2.38it/s]

Episode 7, Reward: 12.0


  1%|          | 8/1000 [00:03<06:20,  2.61it/s]

Episode 8, Reward: 13.0


  1%|          | 9/1000 [00:04<06:08,  2.69it/s]

Episode 9, Reward: 14.0


  1%|          | 10/1000 [00:04<05:42,  2.89it/s]

Episode 10, Reward: 12.0


  1%|          | 11/1000 [00:04<05:30,  2.99it/s]

Episode 11, Reward: 13.0


  1%|          | 12/1000 [00:05<06:14,  2.64it/s]

Episode 12, Reward: 21.0


  1%|▏         | 13/1000 [00:05<05:46,  2.85it/s]

Episode 13, Reward: 11.0


  1%|▏         | 14/1000 [00:06<06:19,  2.60it/s]

Episode 14, Reward: 20.0


  2%|▏         | 15/1000 [00:06<06:30,  2.52it/s]

Episode 15, Reward: 18.0


  2%|▏         | 16/1000 [00:06<05:58,  2.74it/s]

Episode 16, Reward: 11.0


  2%|▏         | 17/1000 [00:07<05:35,  2.93it/s]

Episode 17, Reward: 12.0


  2%|▏         | 18/1000 [00:07<05:17,  3.09it/s]

Episode 18, Reward: 12.0


  2%|▏         | 19/1000 [00:07<05:12,  3.14it/s]

Episode 19, Reward: 12.0


  2%|▏         | 20/1000 [00:08<09:11,  1.78it/s]

Episode 20, Reward: 51.0


  2%|▏         | 21/1000 [00:09<08:43,  1.87it/s]

Episode 21, Reward: 22.0


  2%|▏         | 22/1000 [00:09<08:58,  1.82it/s]

Episode 22, Reward: 28.0


  2%|▏         | 23/1000 [00:10<07:27,  2.18it/s]

Episode 23, Reward: 11.0


  2%|▏         | 24/1000 [00:10<06:46,  2.40it/s]

Episode 24, Reward: 14.0


  2%|▎         | 25/1000 [00:11<07:55,  2.05it/s]

Episode 25, Reward: 31.0


  3%|▎         | 26/1000 [00:11<07:01,  2.31it/s]

Episode 26, Reward: 14.0


  3%|▎         | 27/1000 [00:11<06:10,  2.63it/s]

Episode 27, Reward: 12.0


  3%|▎         | 28/1000 [00:12<06:47,  2.39it/s]

Episode 28, Reward: 24.0


  3%|▎         | 29/1000 [00:12<06:24,  2.53it/s]

Episode 29, Reward: 16.0


  3%|▎         | 30/1000 [00:12<05:45,  2.81it/s]

Episode 30, Reward: 12.0


  3%|▎         | 31/1000 [00:13<06:33,  2.46it/s]

Episode 31, Reward: 25.0


  3%|▎         | 32/1000 [00:13<06:05,  2.65it/s]

Episode 32, Reward: 14.0


  3%|▎         | 33/1000 [00:13<06:18,  2.56it/s]

Episode 33, Reward: 20.0


  3%|▎         | 34/1000 [00:14<06:09,  2.61it/s]

Episode 34, Reward: 17.0


  4%|▎         | 35/1000 [00:14<06:21,  2.53it/s]

Episode 35, Reward: 20.0


  4%|▎         | 36/1000 [00:15<05:53,  2.73it/s]

Episode 36, Reward: 14.0


  4%|▎         | 37/1000 [00:15<07:01,  2.29it/s]

Episode 37, Reward: 29.0


  4%|▍         | 38/1000 [00:15<06:21,  2.52it/s]

Episode 38, Reward: 14.0


  4%|▍         | 39/1000 [00:16<05:33,  2.88it/s]

Episode 39, Reward: 10.0


  4%|▍         | 40/1000 [00:17<07:45,  2.06it/s]

Episode 40, Reward: 34.0


  4%|▍         | 41/1000 [00:17<08:42,  1.83it/s]

Episode 41, Reward: 31.0


  4%|▍         | 42/1000 [00:17<07:22,  2.16it/s]

Episode 42, Reward: 11.0


  4%|▍         | 43/1000 [00:18<07:09,  2.23it/s]

Episode 43, Reward: 16.0


  4%|▍         | 44/1000 [00:18<06:04,  2.63it/s]

Episode 44, Reward: 9.0


  4%|▍         | 45/1000 [00:19<06:11,  2.57it/s]

Episode 45, Reward: 17.0


  5%|▍         | 46/1000 [00:19<06:38,  2.39it/s]

Episode 46, Reward: 21.0


  5%|▍         | 47/1000 [00:19<06:00,  2.65it/s]

Episode 47, Reward: 12.0


  5%|▍         | 48/1000 [00:20<05:57,  2.67it/s]

Episode 48, Reward: 14.0


  5%|▍         | 49/1000 [00:20<07:28,  2.12it/s]

Episode 49, Reward: 31.0


  5%|▌         | 50/1000 [00:21<06:42,  2.36it/s]

Episode 50, Reward: 13.0


  5%|▌         | 51/1000 [00:22<10:49,  1.46it/s]

Episode 51, Reward: 59.0


  5%|▌         | 52/1000 [00:23<10:32,  1.50it/s]

Episode 52, Reward: 28.0


  5%|▌         | 53/1000 [00:23<08:52,  1.78it/s]

Episode 53, Reward: 13.0


  5%|▌         | 54/1000 [00:23<08:15,  1.91it/s]

Episode 54, Reward: 19.0


  6%|▌         | 55/1000 [00:24<07:53,  2.00it/s]

Episode 55, Reward: 17.0


  6%|▌         | 56/1000 [00:24<08:02,  1.96it/s]

Episode 56, Reward: 23.0


  6%|▌         | 57/1000 [00:25<07:54,  1.99it/s]

Episode 57, Reward: 21.0


  6%|▌         | 58/1000 [00:25<07:17,  2.15it/s]

Episode 58, Reward: 17.0


  6%|▌         | 59/1000 [00:26<07:22,  2.13it/s]

Episode 59, Reward: 23.0


  6%|▌         | 60/1000 [00:26<07:36,  2.06it/s]

Episode 60, Reward: 25.0


  6%|▌         | 61/1000 [00:27<07:13,  2.17it/s]

Episode 61, Reward: 19.0


  6%|▌         | 62/1000 [00:27<06:39,  2.35it/s]

Episode 62, Reward: 16.0


  6%|▋         | 63/1000 [00:27<06:14,  2.50it/s]

Episode 63, Reward: 16.0


  6%|▋         | 64/1000 [00:28<05:41,  2.74it/s]

Episode 64, Reward: 13.0


  6%|▋         | 65/1000 [00:28<05:40,  2.74it/s]

Episode 65, Reward: 17.0


  7%|▋         | 66/1000 [00:28<05:17,  2.95it/s]

Episode 66, Reward: 13.0


  7%|▋         | 67/1000 [00:29<05:40,  2.74it/s]

Episode 67, Reward: 20.0


  7%|▋         | 68/1000 [00:29<05:11,  2.99it/s]

Episode 68, Reward: 12.0


  7%|▋         | 69/1000 [00:29<05:54,  2.63it/s]

Episode 69, Reward: 23.0


  7%|▋         | 70/1000 [00:30<05:48,  2.67it/s]

Episode 70, Reward: 17.0


  7%|▋         | 71/1000 [00:30<05:05,  3.04it/s]

Episode 71, Reward: 10.0


  7%|▋         | 72/1000 [00:30<05:50,  2.65it/s]

Episode 72, Reward: 23.0


  7%|▋         | 73/1000 [00:31<07:16,  2.12it/s]

Episode 73, Reward: 33.0


  7%|▋         | 74/1000 [00:31<06:23,  2.41it/s]

Episode 74, Reward: 13.0


  8%|▊         | 75/1000 [00:32<06:34,  2.35it/s]

Episode 75, Reward: 21.0


  8%|▊         | 76/1000 [00:32<06:05,  2.53it/s]

Episode 76, Reward: 15.0


  8%|▊         | 77/1000 [00:33<06:01,  2.55it/s]

Episode 77, Reward: 18.0


  8%|▊         | 78/1000 [00:33<06:33,  2.34it/s]

Episode 78, Reward: 24.0


  8%|▊         | 79/1000 [00:33<05:36,  2.74it/s]

Episode 79, Reward: 10.0


  8%|▊         | 80/1000 [00:34<05:07,  2.99it/s]

Episode 80, Reward: 12.0


  8%|▊         | 82/1000 [00:34<04:35,  3.33it/s]

Episode 81, Reward: 17.0
Episode 82, Reward: 9.0


  8%|▊         | 83/1000 [00:34<04:16,  3.57it/s]

Episode 83, Reward: 10.0


  8%|▊         | 84/1000 [00:35<04:03,  3.77it/s]

Episode 84, Reward: 10.0


  8%|▊         | 85/1000 [00:35<03:58,  3.84it/s]

Episode 85, Reward: 11.0


  9%|▊         | 86/1000 [00:35<03:53,  3.91it/s]

Episode 86, Reward: 11.0


  9%|▊         | 87/1000 [00:35<04:14,  3.59it/s]

Episode 87, Reward: 15.0


  9%|▉         | 88/1000 [00:36<04:17,  3.54it/s]

Episode 88, Reward: 13.0


  9%|▉         | 89/1000 [00:36<04:02,  3.75it/s]

Episode 89, Reward: 10.0


  9%|▉         | 91/1000 [00:36<03:32,  4.28it/s]

Episode 90, Reward: 9.0
Episode 91, Reward: 8.0


  9%|▉         | 92/1000 [00:37<04:13,  3.59it/s]

Episode 92, Reward: 18.0


  9%|▉         | 93/1000 [00:37<03:57,  3.82it/s]

Episode 93, Reward: 10.0


  9%|▉         | 94/1000 [00:37<03:41,  4.10it/s]

Episode 94, Reward: 9.0


 10%|▉         | 95/1000 [00:38<04:33,  3.30it/s]

Episode 95, Reward: 21.0


 10%|▉         | 96/1000 [00:38<04:38,  3.24it/s]

Episode 96, Reward: 15.0


 10%|▉         | 97/1000 [00:38<05:14,  2.87it/s]

Episode 97, Reward: 21.0


 10%|▉         | 98/1000 [00:39<04:40,  3.22it/s]

Episode 98, Reward: 10.0


 10%|▉         | 99/1000 [00:39<04:15,  3.53it/s]

Episode 99, Reward: 10.0


 10%|█         | 100/1000 [00:39<04:05,  3.66it/s]

Episode 100, Reward: 11.0


 10%|█         | 101/1000 [00:39<03:47,  3.96it/s]

Episode 101, Reward: 9.0


 10%|█         | 102/1000 [00:40<03:54,  3.83it/s]

Episode 102, Reward: 13.0


 10%|█         | 103/1000 [00:40<03:43,  4.02it/s]

Episode 103, Reward: 10.0


 10%|█         | 104/1000 [00:40<03:48,  3.92it/s]

Episode 104, Reward: 12.0


 10%|█         | 105/1000 [00:40<03:50,  3.88it/s]

Episode 105, Reward: 12.0


 11%|█         | 107/1000 [00:41<03:28,  4.29it/s]

Episode 106, Reward: 10.0
Episode 107, Reward: 9.0


 11%|█         | 108/1000 [00:41<03:31,  4.22it/s]

Episode 108, Reward: 11.0


 11%|█         | 109/1000 [00:41<03:58,  3.73it/s]

Episode 109, Reward: 16.0


 11%|█         | 110/1000 [00:42<04:40,  3.17it/s]

Episode 110, Reward: 20.0


 11%|█         | 111/1000 [00:42<05:13,  2.84it/s]

Episode 111, Reward: 21.0


 11%|█         | 112/1000 [00:42<04:43,  3.13it/s]

Episode 112, Reward: 11.0


 11%|█▏        | 113/1000 [00:43<04:18,  3.44it/s]

Episode 113, Reward: 10.0


 11%|█▏        | 114/1000 [00:43<03:59,  3.70it/s]

Episode 114, Reward: 10.0


 12%|█▏        | 115/1000 [00:43<03:41,  3.99it/s]

Episode 115, Reward: 9.0


 12%|█▏        | 116/1000 [00:43<03:44,  3.94it/s]

Episode 116, Reward: 12.0


 12%|█▏        | 117/1000 [00:44<03:41,  3.99it/s]

Episode 117, Reward: 11.0


 12%|█▏        | 118/1000 [00:44<03:54,  3.76it/s]

Episode 118, Reward: 14.0


 12%|█▏        | 119/1000 [00:44<03:48,  3.86it/s]

Episode 119, Reward: 11.0


 12%|█▏        | 120/1000 [00:44<03:34,  4.10it/s]

Episode 120, Reward: 9.0


 12%|█▏        | 121/1000 [00:45<03:38,  4.02it/s]

Episode 121, Reward: 12.0


 12%|█▏        | 122/1000 [00:45<03:37,  4.04it/s]

Episode 122, Reward: 11.0


 12%|█▏        | 123/1000 [00:45<03:25,  4.26it/s]

Episode 123, Reward: 9.0


 12%|█▏        | 124/1000 [00:45<03:22,  4.33it/s]

Episode 124, Reward: 10.0


 13%|█▎        | 126/1000 [00:46<03:40,  3.96it/s]

Episode 125, Reward: 19.0
Episode 126, Reward: 8.0


 13%|█▎        | 127/1000 [00:46<03:27,  4.20it/s]

Episode 127, Reward: 9.0


 13%|█▎        | 128/1000 [00:46<03:29,  4.17it/s]

Episode 128, Reward: 11.0


 13%|█▎        | 129/1000 [00:46<03:20,  4.35it/s]

Episode 129, Reward: 9.0


 13%|█▎        | 130/1000 [00:47<03:16,  4.42it/s]

Episode 130, Reward: 9.0


 13%|█▎        | 131/1000 [00:47<03:23,  4.26it/s]

Episode 131, Reward: 11.0


 13%|█▎        | 132/1000 [00:47<03:23,  4.27it/s]

Episode 132, Reward: 10.0


 13%|█▎        | 133/1000 [00:47<03:20,  4.32it/s]

Episode 133, Reward: 9.0


 13%|█▎        | 134/1000 [00:48<03:17,  4.39it/s]

Episode 134, Reward: 9.0


 14%|█▎        | 135/1000 [00:48<03:20,  4.31it/s]

Episode 135, Reward: 10.0


 14%|█▎        | 137/1000 [00:48<03:19,  4.33it/s]

Episode 136, Reward: 12.0
Episode 137, Reward: 8.0


 14%|█▍        | 138/1000 [00:49<03:47,  3.79it/s]

Episode 138, Reward: 15.0


 14%|█▍        | 140/1000 [00:49<03:37,  3.96it/s]

Episode 139, Reward: 14.0
Episode 140, Reward: 9.0


 14%|█▍        | 142/1000 [00:50<03:13,  4.44it/s]

Episode 141, Reward: 10.0
Episode 142, Reward: 8.0


 14%|█▍        | 143/1000 [00:50<03:17,  4.34it/s]

Episode 143, Reward: 11.0


 14%|█▍        | 145/1000 [00:50<03:03,  4.67it/s]

Episode 144, Reward: 10.0
Episode 145, Reward: 8.0


 15%|█▍        | 146/1000 [00:51<03:15,  4.38it/s]

Episode 146, Reward: 12.0


 15%|█▍        | 147/1000 [00:51<03:23,  4.19it/s]

Episode 147, Reward: 12.0


 15%|█▍        | 148/1000 [00:51<03:15,  4.35it/s]

Episode 148, Reward: 9.0


 15%|█▌        | 150/1000 [00:51<03:22,  4.19it/s]

Episode 149, Reward: 16.0
Episode 150, Reward: 8.0


 15%|█▌        | 151/1000 [00:52<03:19,  4.26it/s]

Episode 151, Reward: 10.0


 15%|█▌        | 152/1000 [00:52<03:15,  4.34it/s]

Episode 152, Reward: 10.0


 15%|█▌        | 153/1000 [00:52<03:13,  4.37it/s]

Episode 153, Reward: 10.0


 15%|█▌        | 154/1000 [00:52<03:12,  4.39it/s]

Episode 154, Reward: 10.0


 16%|█▌        | 156/1000 [00:53<03:07,  4.50it/s]

Episode 155, Reward: 11.0
Episode 156, Reward: 9.0


 16%|█▌        | 157/1000 [00:53<03:02,  4.63it/s]

Episode 157, Reward: 9.0


 16%|█▌        | 158/1000 [00:53<03:08,  4.48it/s]

Episode 158, Reward: 11.0


 16%|█▌        | 160/1000 [00:54<03:04,  4.55it/s]

Episode 159, Reward: 11.0
Episode 160, Reward: 9.0


 16%|█▌        | 161/1000 [00:54<03:09,  4.42it/s]

Episode 161, Reward: 11.0


 16%|█▌        | 162/1000 [00:54<03:09,  4.42it/s]

Episode 162, Reward: 10.0


 16%|█▋        | 163/1000 [00:55<03:34,  3.90it/s]

Episode 163, Reward: 15.0


 16%|█▋        | 164/1000 [00:55<03:25,  4.06it/s]

Episode 164, Reward: 10.0


 16%|█▋        | 165/1000 [00:55<03:25,  4.07it/s]

Episode 165, Reward: 11.0


 17%|█▋        | 166/1000 [00:55<03:18,  4.20it/s]

Episode 166, Reward: 10.0


 17%|█▋        | 167/1000 [00:56<03:45,  3.69it/s]

Episode 167, Reward: 16.0


 17%|█▋        | 168/1000 [00:56<03:29,  3.98it/s]

Episode 168, Reward: 9.0


 17%|█▋        | 169/1000 [00:56<03:21,  4.13it/s]

Episode 169, Reward: 10.0


 17%|█▋        | 170/1000 [00:56<03:25,  4.03it/s]

Episode 170, Reward: 12.0


 17%|█▋        | 172/1000 [00:57<03:01,  4.55it/s]

Episode 171, Reward: 9.0
Episode 172, Reward: 8.0


 17%|█▋        | 174/1000 [00:57<03:00,  4.57it/s]

Episode 173, Reward: 11.0
Episode 174, Reward: 9.0


 18%|█▊        | 175/1000 [00:57<02:56,  4.68it/s]

Episode 175, Reward: 9.0


 18%|█▊        | 177/1000 [00:58<02:55,  4.68it/s]

Episode 176, Reward: 10.0
Episode 177, Reward: 9.0


 18%|█▊        | 178/1000 [00:58<02:53,  4.75it/s]

Episode 178, Reward: 9.0


 18%|█▊        | 179/1000 [00:58<02:55,  4.67it/s]

Episode 179, Reward: 10.0


 18%|█▊        | 181/1000 [00:59<02:47,  4.88it/s]

Episode 180, Reward: 9.0
Episode 181, Reward: 8.0


 18%|█▊        | 182/1000 [00:59<02:57,  4.60it/s]

Episode 182, Reward: 11.0


 18%|█▊        | 183/1000 [00:59<02:57,  4.61it/s]

Episode 183, Reward: 9.0


 18%|█▊        | 184/1000 [00:59<03:00,  4.52it/s]

Episode 184, Reward: 10.0


 18%|█▊        | 185/1000 [00:59<03:03,  4.45it/s]

Episode 185, Reward: 10.0


 19%|█▊        | 186/1000 [01:00<03:00,  4.52it/s]

Episode 186, Reward: 9.0


 19%|█▊        | 187/1000 [01:00<03:05,  4.38it/s]

Episode 187, Reward: 10.0


 19%|█▉        | 188/1000 [01:00<03:02,  4.45it/s]

Episode 188, Reward: 9.0


 19%|█▉        | 189/1000 [01:00<03:15,  4.16it/s]

Episode 189, Reward: 12.0


 19%|█▉        | 190/1000 [01:01<03:11,  4.23it/s]

Episode 190, Reward: 9.0


 19%|█▉        | 191/1000 [01:01<03:05,  4.36it/s]

Episode 191, Reward: 9.0


 19%|█▉        | 192/1000 [01:01<03:03,  4.41it/s]

Episode 192, Reward: 10.0


 19%|█▉        | 194/1000 [01:01<02:55,  4.60it/s]

Episode 193, Reward: 10.0
Episode 194, Reward: 9.0


 20%|█▉        | 196/1000 [01:02<03:05,  4.34it/s]

Episode 195, Reward: 14.0
Episode 196, Reward: 9.0


 20%|█▉        | 197/1000 [01:02<02:58,  4.49it/s]

Episode 197, Reward: 9.0


 20%|█▉        | 199/1000 [01:03<02:52,  4.64it/s]

Episode 198, Reward: 11.0
Episode 199, Reward: 8.0


 20%|██        | 200/1000 [01:03<02:58,  4.48it/s]

Episode 200, Reward: 11.0


 20%|██        | 201/1000 [01:03<02:53,  4.60it/s]

Episode 201, Reward: 9.0


 20%|██        | 202/1000 [01:03<02:51,  4.66it/s]

Episode 202, Reward: 9.0


 20%|██        | 203/1000 [01:03<02:52,  4.61it/s]

Episode 203, Reward: 10.0


 20%|██        | 205/1000 [01:04<02:46,  4.77it/s]

Episode 204, Reward: 10.0
Episode 205, Reward: 8.0


 21%|██        | 206/1000 [01:04<02:39,  4.98it/s]

Episode 206, Reward: 8.0


 21%|██        | 207/1000 [01:04<02:40,  4.95it/s]

Episode 207, Reward: 9.0


 21%|██        | 208/1000 [01:05<02:45,  4.78it/s]

Episode 208, Reward: 10.0


 21%|██        | 209/1000 [01:05<02:51,  4.62it/s]

Episode 209, Reward: 10.0


 21%|██        | 210/1000 [01:05<02:54,  4.54it/s]

Episode 210, Reward: 10.0


 21%|██        | 211/1000 [01:05<03:00,  4.37it/s]

Episode 211, Reward: 8.0


 21%|██        | 212/1000 [01:05<03:01,  4.33it/s]

Episode 212, Reward: 10.0


 21%|██▏       | 213/1000 [01:06<02:59,  4.38it/s]

Episode 213, Reward: 10.0


 21%|██▏       | 214/1000 [01:06<02:53,  4.53it/s]

Episode 214, Reward: 9.0


 22%|██▏       | 215/1000 [01:06<03:05,  4.23it/s]

Episode 215, Reward: 9.0


 22%|██▏       | 216/1000 [01:06<03:09,  4.13it/s]

Episode 216, Reward: 11.0


 22%|██▏       | 218/1000 [01:07<02:52,  4.54it/s]

Episode 217, Reward: 10.0
Episode 218, Reward: 8.0


 22%|██▏       | 219/1000 [01:07<02:54,  4.46it/s]

Episode 219, Reward: 10.0


 22%|██▏       | 220/1000 [01:07<02:55,  4.46it/s]

Episode 220, Reward: 10.0


 22%|██▏       | 221/1000 [01:07<02:51,  4.55it/s]

Episode 221, Reward: 9.0


 22%|██▏       | 222/1000 [01:08<02:51,  4.53it/s]

Episode 222, Reward: 10.0


 22%|██▏       | 223/1000 [01:08<02:48,  4.60it/s]

Episode 223, Reward: 9.0


 22%|██▎       | 225/1000 [01:08<02:40,  4.82it/s]

Episode 224, Reward: 10.0
Episode 225, Reward: 8.0


 23%|██▎       | 226/1000 [01:09<02:38,  4.88it/s]

Episode 226, Reward: 9.0


 23%|██▎       | 227/1000 [01:09<02:43,  4.73it/s]

Episode 227, Reward: 10.0


 23%|██▎       | 228/1000 [01:09<02:43,  4.73it/s]

Episode 228, Reward: 9.0


 23%|██▎       | 229/1000 [01:09<02:43,  4.73it/s]

Episode 229, Reward: 9.0


 23%|██▎       | 230/1000 [01:09<02:45,  4.65it/s]

Episode 230, Reward: 10.0


 23%|██▎       | 231/1000 [01:10<02:47,  4.60it/s]

Episode 231, Reward: 10.0


 23%|██▎       | 232/1000 [01:10<02:49,  4.53it/s]

Episode 232, Reward: 10.0


 23%|██▎       | 234/1000 [01:10<02:44,  4.64it/s]

Episode 233, Reward: 10.0
Episode 234, Reward: 9.0


 24%|██▎       | 235/1000 [01:11<02:47,  4.58it/s]

Episode 235, Reward: 10.0


 24%|██▎       | 236/1000 [01:11<02:47,  4.55it/s]

Episode 236, Reward: 10.0


 24%|██▍       | 238/1000 [01:11<02:41,  4.72it/s]

Episode 237, Reward: 10.0
Episode 238, Reward: 8.0


 24%|██▍       | 239/1000 [01:11<02:41,  4.70it/s]

Episode 239, Reward: 9.0


 24%|██▍       | 240/1000 [01:12<02:43,  4.66it/s]

Episode 240, Reward: 9.0


 24%|██▍       | 241/1000 [01:12<02:43,  4.63it/s]

Episode 241, Reward: 9.0


 24%|██▍       | 243/1000 [01:12<02:43,  4.64it/s]

Episode 242, Reward: 10.0
Episode 243, Reward: 8.0


 24%|██▍       | 244/1000 [01:12<02:42,  4.67it/s]

Episode 244, Reward: 8.0


 24%|██▍       | 245/1000 [01:13<02:46,  4.52it/s]

Episode 245, Reward: 10.0


 25%|██▍       | 246/1000 [01:13<02:47,  4.49it/s]

Episode 246, Reward: 9.0


 25%|██▍       | 248/1000 [01:13<02:37,  4.76it/s]

Episode 247, Reward: 9.0
Episode 248, Reward: 8.0


 25%|██▌       | 250/1000 [01:14<02:35,  4.83it/s]

Episode 249, Reward: 9.0
Episode 250, Reward: 9.0


 25%|██▌       | 251/1000 [01:14<02:39,  4.69it/s]

Episode 251, Reward: 10.0


 25%|██▌       | 253/1000 [01:14<02:31,  4.92it/s]

Episode 252, Reward: 9.0
Episode 253, Reward: 8.0


 25%|██▌       | 254/1000 [01:15<02:36,  4.77it/s]

Episode 254, Reward: 10.0


 26%|██▌       | 255/1000 [01:15<02:39,  4.66it/s]

Episode 255, Reward: 10.0


 26%|██▌       | 256/1000 [01:15<02:38,  4.68it/s]

Episode 256, Reward: 9.0


 26%|██▌       | 257/1000 [01:15<02:38,  4.68it/s]

Episode 257, Reward: 9.0


 26%|██▌       | 258/1000 [01:15<02:38,  4.69it/s]

Episode 258, Reward: 9.0


 26%|██▌       | 259/1000 [01:16<02:39,  4.63it/s]

Episode 259, Reward: 10.0


 26%|██▌       | 260/1000 [01:16<02:41,  4.59it/s]

Episode 260, Reward: 10.0


 26%|██▌       | 262/1000 [01:16<02:34,  4.78it/s]

Episode 261, Reward: 10.0
Episode 262, Reward: 8.0


 26%|██▋       | 263/1000 [01:17<02:36,  4.70it/s]

Episode 263, Reward: 10.0


 26%|██▋       | 264/1000 [01:17<02:38,  4.63it/s]

Episode 264, Reward: 10.0


 26%|██▋       | 265/1000 [01:17<02:41,  4.56it/s]

Episode 265, Reward: 10.0


 27%|██▋       | 266/1000 [01:17<02:38,  4.64it/s]

Episode 266, Reward: 9.0


 27%|██▋       | 267/1000 [01:17<02:36,  4.69it/s]

Episode 267, Reward: 9.0


 27%|██▋       | 268/1000 [01:18<02:35,  4.70it/s]

Episode 268, Reward: 9.0


 27%|██▋       | 269/1000 [01:18<02:38,  4.62it/s]

Episode 269, Reward: 10.0


 27%|██▋       | 270/1000 [01:18<02:39,  4.57it/s]

Episode 270, Reward: 10.0


 27%|██▋       | 271/1000 [01:18<02:40,  4.54it/s]

Episode 271, Reward: 10.0


 27%|██▋       | 273/1000 [01:19<02:32,  4.77it/s]

Episode 272, Reward: 10.0
Episode 273, Reward: 8.0


 28%|██▊       | 275/1000 [01:19<02:27,  4.91it/s]

Episode 274, Reward: 9.0
Episode 275, Reward: 8.0


 28%|██▊       | 276/1000 [01:19<02:31,  4.76it/s]

Episode 276, Reward: 10.0


 28%|██▊       | 277/1000 [01:19<02:31,  4.77it/s]

Episode 277, Reward: 9.0


 28%|██▊       | 278/1000 [01:20<02:34,  4.67it/s]

Episode 278, Reward: 10.0


 28%|██▊       | 279/1000 [01:20<02:40,  4.48it/s]

Episode 279, Reward: 11.0


 28%|██▊       | 280/1000 [01:20<02:38,  4.55it/s]

Episode 280, Reward: 9.0


 28%|██▊       | 281/1000 [01:20<02:34,  4.65it/s]

Episode 281, Reward: 9.0


 28%|██▊       | 282/1000 [01:21<02:36,  4.60it/s]

Episode 282, Reward: 10.0


 28%|██▊       | 283/1000 [01:21<02:33,  4.68it/s]

Episode 283, Reward: 9.0


 28%|██▊       | 284/1000 [01:21<02:34,  4.62it/s]

Episode 284, Reward: 10.0


 28%|██▊       | 285/1000 [01:21<02:36,  4.57it/s]

Episode 285, Reward: 10.0


 29%|██▊       | 287/1000 [01:22<02:28,  4.79it/s]

Episode 286, Reward: 10.0
Episode 287, Reward: 8.0


 29%|██▉       | 288/1000 [01:22<02:23,  4.96it/s]

Episode 288, Reward: 8.0


 29%|██▉       | 289/1000 [01:22<02:25,  4.89it/s]

Episode 289, Reward: 9.0


 29%|██▉       | 290/1000 [01:22<02:30,  4.72it/s]

Episode 290, Reward: 10.0


 29%|██▉       | 291/1000 [01:22<02:29,  4.74it/s]

Episode 291, Reward: 9.0


 29%|██▉       | 293/1000 [01:23<02:22,  4.96it/s]

Episode 292, Reward: 9.0
Episode 293, Reward: 8.0


 29%|██▉       | 294/1000 [01:23<02:23,  4.92it/s]

Episode 294, Reward: 9.0


 30%|██▉       | 295/1000 [01:23<02:34,  4.57it/s]

Episode 295, Reward: 11.0


 30%|██▉       | 296/1000 [01:24<02:34,  4.54it/s]

Episode 296, Reward: 9.0


 30%|██▉       | 297/1000 [01:24<02:37,  4.47it/s]

Episode 297, Reward: 10.0


 30%|██▉       | 298/1000 [01:24<02:39,  4.39it/s]

Episode 298, Reward: 10.0
Episode 299, Reward: 8.0


 30%|███       | 300/1000 [01:24<02:32,  4.60it/s]

Episode 300, Reward: 8.0


 30%|███       | 302/1000 [01:25<02:30,  4.65it/s]

Episode 301, Reward: 10.0
Episode 302, Reward: 8.0


 30%|███       | 303/1000 [01:25<02:30,  4.63it/s]

Episode 303, Reward: 9.0


 30%|███       | 305/1000 [01:26<02:26,  4.74it/s]

Episode 304, Reward: 10.0
Episode 305, Reward: 8.0


 31%|███       | 306/1000 [01:26<02:28,  4.67it/s]

Episode 306, Reward: 10.0


 31%|███       | 308/1000 [01:26<02:22,  4.85it/s]

Episode 307, Reward: 10.0
Episode 308, Reward: 8.0


 31%|███       | 309/1000 [01:26<02:21,  4.88it/s]

Episode 309, Reward: 9.0


 31%|███       | 310/1000 [01:27<02:25,  4.75it/s]

Episode 310, Reward: 10.0


 31%|███       | 312/1000 [01:27<02:18,  4.96it/s]

Episode 311, Reward: 9.0
Episode 312, Reward: 8.0


 31%|███▏      | 314/1000 [01:27<02:17,  5.00it/s]

Episode 313, Reward: 10.0
Episode 314, Reward: 8.0


 32%|███▏      | 315/1000 [01:28<02:17,  4.99it/s]

Episode 315, Reward: 9.0


 32%|███▏      | 316/1000 [01:28<02:22,  4.82it/s]

Episode 316, Reward: 10.0


 32%|███▏      | 317/1000 [01:28<02:25,  4.69it/s]

Episode 317, Reward: 10.0


 32%|███▏      | 318/1000 [01:28<02:27,  4.62it/s]

Episode 318, Reward: 10.0


 32%|███▏      | 319/1000 [01:28<02:28,  4.57it/s]

Episode 319, Reward: 10.0


 32%|███▏      | 320/1000 [01:29<02:30,  4.53it/s]

Episode 320, Reward: 10.0


 32%|███▏      | 321/1000 [01:29<02:27,  4.61it/s]

Episode 321, Reward: 9.0


 32%|███▏      | 323/1000 [01:29<02:19,  4.86it/s]

Episode 322, Reward: 9.0
Episode 323, Reward: 8.0


 32%|███▏      | 324/1000 [01:30<02:18,  4.87it/s]

Episode 324, Reward: 9.0


 32%|███▎      | 325/1000 [01:30<02:18,  4.87it/s]

Episode 325, Reward: 9.0


 33%|███▎      | 326/1000 [01:30<02:22,  4.74it/s]

Episode 326, Reward: 10.0


 33%|███▎      | 327/1000 [01:30<02:21,  4.74it/s]

Episode 327, Reward: 9.0


 33%|███▎      | 328/1000 [01:30<02:20,  4.77it/s]

Episode 328, Reward: 9.0


 33%|███▎      | 329/1000 [01:31<02:19,  4.82it/s]

Episode 329, Reward: 9.0


 33%|███▎      | 330/1000 [01:31<02:22,  4.70it/s]

Episode 330, Reward: 10.0


 33%|███▎      | 331/1000 [01:31<02:24,  4.63it/s]

Episode 331, Reward: 10.0


 33%|███▎      | 332/1000 [01:31<02:25,  4.58it/s]

Episode 332, Reward: 10.0


 33%|███▎      | 333/1000 [01:31<02:28,  4.49it/s]

Episode 333, Reward: 10.0


 33%|███▎      | 334/1000 [01:32<02:28,  4.48it/s]

Episode 334, Reward: 10.0


 34%|███▎      | 335/1000 [01:32<02:24,  4.59it/s]

Episode 335, Reward: 9.0


 34%|███▎      | 337/1000 [01:32<02:18,  4.79it/s]

Episode 336, Reward: 10.0
Episode 337, Reward: 8.0


 34%|███▍      | 338/1000 [01:32<02:13,  4.96it/s]

Episode 338, Reward: 8.0


 34%|███▍      | 339/1000 [01:33<02:17,  4.80it/s]

Episode 339, Reward: 10.0


 34%|███▍      | 341/1000 [01:33<02:17,  4.78it/s]

Episode 340, Reward: 10.0
Episode 341, Reward: 9.0


 34%|███▍      | 342/1000 [01:33<02:20,  4.67it/s]

Episode 342, Reward: 10.0


 34%|███▍      | 343/1000 [01:34<02:23,  4.59it/s]

Episode 343, Reward: 10.0


 34%|███▍      | 344/1000 [01:34<02:24,  4.54it/s]

Episode 344, Reward: 10.0


 35%|███▍      | 346/1000 [01:34<02:17,  4.76it/s]

Episode 345, Reward: 10.0
Episode 346, Reward: 8.0


 35%|███▍      | 348/1000 [01:35<02:08,  5.07it/s]

Episode 347, Reward: 8.0
Episode 348, Reward: 8.0


 35%|███▍      | 349/1000 [01:35<02:05,  5.18it/s]

Episode 349, Reward: 8.0


 35%|███▌      | 350/1000 [01:35<02:11,  4.94it/s]

Episode 350, Reward: 10.0


 35%|███▌      | 351/1000 [01:35<02:15,  4.79it/s]

Episode 351, Reward: 10.0


 35%|███▌      | 352/1000 [01:35<02:19,  4.64it/s]

Episode 352, Reward: 10.0


 35%|███▌      | 353/1000 [01:36<02:20,  4.60it/s]

Episode 353, Reward: 9.0


 35%|███▌      | 354/1000 [01:36<02:22,  4.53it/s]

Episode 354, Reward: 9.0


 36%|███▌      | 356/1000 [01:36<02:18,  4.64it/s]

Episode 355, Reward: 10.0
Episode 356, Reward: 8.0


 36%|███▌      | 357/1000 [01:37<02:23,  4.49it/s]

Episode 357, Reward: 10.0


 36%|███▌      | 358/1000 [01:37<02:21,  4.53it/s]

Episode 358, Reward: 9.0


 36%|███▌      | 359/1000 [01:37<02:29,  4.29it/s]

Episode 359, Reward: 11.0


 36%|███▌      | 360/1000 [01:37<02:33,  4.16it/s]

Episode 360, Reward: 11.0


 36%|███▌      | 362/1000 [01:38<02:22,  4.48it/s]

Episode 361, Reward: 9.0
Episode 362, Reward: 8.0


 36%|███▋      | 363/1000 [01:38<02:18,  4.60it/s]

Episode 363, Reward: 9.0


 36%|███▋      | 364/1000 [01:38<02:23,  4.45it/s]

Episode 364, Reward: 11.0


 36%|███▋      | 365/1000 [01:38<02:21,  4.50it/s]

Episode 365, Reward: 9.0


 37%|███▋      | 366/1000 [01:39<02:23,  4.42it/s]

Episode 366, Reward: 10.0


 37%|███▋      | 368/1000 [01:39<02:15,  4.67it/s]

Episode 367, Reward: 10.0
Episode 368, Reward: 8.0


 37%|███▋      | 369/1000 [01:39<02:17,  4.60it/s]

Episode 369, Reward: 10.0


 37%|███▋      | 371/1000 [01:40<02:10,  4.84it/s]

Episode 370, Reward: 9.0
Episode 371, Reward: 8.0


 37%|███▋      | 373/1000 [01:40<02:08,  4.87it/s]

Episode 372, Reward: 10.0
Episode 373, Reward: 8.0


 38%|███▊      | 375/1000 [01:40<02:06,  4.93it/s]

Episode 374, Reward: 9.0
Episode 375, Reward: 9.0


 38%|███▊      | 377/1000 [01:41<02:04,  5.01it/s]

Episode 376, Reward: 9.0
Episode 377, Reward: 8.0


 38%|███▊      | 378/1000 [01:41<02:09,  4.82it/s]

Episode 378, Reward: 10.0


 38%|███▊      | 379/1000 [01:41<02:12,  4.70it/s]

Episode 379, Reward: 10.0


 38%|███▊      | 380/1000 [01:42<02:14,  4.62it/s]

Episode 380, Reward: 10.0


 38%|███▊      | 381/1000 [01:42<02:12,  4.66it/s]

Episode 381, Reward: 9.0


 38%|███▊      | 382/1000 [01:42<02:16,  4.53it/s]

Episode 382, Reward: 10.0


 38%|███▊      | 384/1000 [01:42<02:07,  4.82it/s]

Episode 383, Reward: 9.0
Episode 384, Reward: 8.0


 38%|███▊      | 385/1000 [01:43<02:10,  4.71it/s]

Episode 385, Reward: 10.0


 39%|███▊      | 386/1000 [01:43<02:12,  4.62it/s]

Episode 386, Reward: 10.0


 39%|███▊      | 387/1000 [01:43<02:14,  4.56it/s]

Episode 387, Reward: 10.0


 39%|███▉      | 388/1000 [01:43<02:11,  4.64it/s]

Episode 388, Reward: 9.0


 39%|███▉      | 389/1000 [01:43<02:13,  4.59it/s]

Episode 389, Reward: 10.0


 39%|███▉      | 390/1000 [01:44<02:14,  4.52it/s]

Episode 390, Reward: 9.0


 39%|███▉      | 391/1000 [01:44<02:12,  4.59it/s]

Episode 391, Reward: 9.0


 39%|███▉      | 392/1000 [01:44<02:14,  4.53it/s]

Episode 392, Reward: 10.0


 39%|███▉      | 393/1000 [01:44<02:14,  4.50it/s]

Episode 393, Reward: 10.0


 39%|███▉      | 394/1000 [01:45<02:15,  4.48it/s]

Episode 394, Reward: 10.0


 40%|███▉      | 395/1000 [01:45<02:19,  4.34it/s]

Episode 395, Reward: 11.0


 40%|███▉      | 397/1000 [01:45<02:09,  4.64it/s]

Episode 396, Reward: 10.0
Episode 397, Reward: 8.0


 40%|███▉      | 398/1000 [01:45<02:10,  4.60it/s]

Episode 398, Reward: 10.0


 40%|███▉      | 399/1000 [01:46<02:12,  4.54it/s]

Episode 399, Reward: 10.0


 40%|████      | 401/1000 [01:46<02:03,  4.84it/s]

Episode 400, Reward: 9.0
Episode 401, Reward: 8.0


 40%|████      | 402/1000 [01:46<02:06,  4.73it/s]

Episode 402, Reward: 10.0


 40%|████      | 404/1000 [01:47<02:01,  4.92it/s]

Episode 403, Reward: 9.0
Episode 404, Reward: 8.0


 40%|████      | 405/1000 [01:47<02:05,  4.76it/s]

Episode 405, Reward: 10.0


 41%|████      | 406/1000 [01:47<02:07,  4.65it/s]

Episode 406, Reward: 10.0


 41%|████      | 407/1000 [01:47<02:10,  4.56it/s]

Episode 407, Reward: 10.0


 41%|████      | 408/1000 [01:48<02:10,  4.52it/s]

Episode 408, Reward: 10.0


 41%|████      | 409/1000 [01:48<02:11,  4.51it/s]

Episode 409, Reward: 9.0


 41%|████      | 410/1000 [01:48<02:14,  4.40it/s]

Episode 410, Reward: 10.0


 41%|████      | 411/1000 [01:48<02:13,  4.41it/s]

Episode 411, Reward: 9.0


 41%|████      | 412/1000 [01:49<02:14,  4.38it/s]

Episode 412, Reward: 9.0


 41%|████▏     | 413/1000 [01:49<02:13,  4.41it/s]

Episode 413, Reward: 9.0


 42%|████▏     | 415/1000 [01:49<02:09,  4.52it/s]

Episode 414, Reward: 10.0
Episode 415, Reward: 8.0


 42%|████▏     | 417/1000 [01:50<02:03,  4.73it/s]

Episode 416, Reward: 8.0
Episode 417, Reward: 8.0


 42%|████▏     | 418/1000 [01:50<02:02,  4.76it/s]

Episode 418, Reward: 8.0


 42%|████▏     | 419/1000 [01:50<02:04,  4.66it/s]

Episode 419, Reward: 10.0


 42%|████▏     | 420/1000 [01:50<02:03,  4.69it/s]

Episode 420, Reward: 9.0


 42%|████▏     | 421/1000 [01:50<02:05,  4.62it/s]

Episode 421, Reward: 10.0


 42%|████▏     | 422/1000 [01:51<02:06,  4.56it/s]

Episode 422, Reward: 10.0


 42%|████▏     | 423/1000 [01:51<02:07,  4.51it/s]

Episode 423, Reward: 10.0


 42%|████▏     | 424/1000 [01:51<02:04,  4.61it/s]

Episode 424, Reward: 9.0


 42%|████▎     | 425/1000 [01:51<02:06,  4.55it/s]

Episode 425, Reward: 10.0


 43%|████▎     | 426/1000 [01:52<02:07,  4.50it/s]

Episode 426, Reward: 10.0


 43%|████▎     | 427/1000 [01:52<02:04,  4.59it/s]

Episode 427, Reward: 9.0


 43%|████▎     | 429/1000 [01:52<01:58,  4.84it/s]

Episode 428, Reward: 9.0
Episode 429, Reward: 8.0


 43%|████▎     | 430/1000 [01:52<01:54,  4.96it/s]

Episode 430, Reward: 8.0


 43%|████▎     | 431/1000 [01:53<01:59,  4.78it/s]

Episode 431, Reward: 10.0


 43%|████▎     | 432/1000 [01:53<02:01,  4.67it/s]

Episode 432, Reward: 10.0


 43%|████▎     | 433/1000 [01:53<02:03,  4.58it/s]

Episode 433, Reward: 10.0


 43%|████▎     | 434/1000 [01:53<02:03,  4.58it/s]

Episode 434, Reward: 9.0


 44%|████▎     | 435/1000 [01:54<02:05,  4.51it/s]

Episode 435, Reward: 10.0


 44%|████▎     | 436/1000 [01:54<02:09,  4.36it/s]

Episode 436, Reward: 11.0


 44%|████▎     | 437/1000 [01:54<02:06,  4.45it/s]

Episode 437, Reward: 9.0


 44%|████▍     | 438/1000 [01:54<02:06,  4.44it/s]

Episode 438, Reward: 10.0


 44%|████▍     | 439/1000 [01:54<02:08,  4.37it/s]

Episode 439, Reward: 10.0


 44%|████▍     | 441/1000 [01:55<02:00,  4.65it/s]

Episode 440, Reward: 10.0
Episode 441, Reward: 8.0


 44%|████▍     | 443/1000 [01:55<01:55,  4.82it/s]

Episode 442, Reward: 9.0
Episode 443, Reward: 8.0


 44%|████▍     | 445/1000 [01:56<01:54,  4.86it/s]

Episode 444, Reward: 10.0
Episode 445, Reward: 8.0


 45%|████▍     | 446/1000 [01:56<01:57,  4.70it/s]

Episode 446, Reward: 10.0


 45%|████▍     | 447/1000 [01:56<02:00,  4.60it/s]

Episode 447, Reward: 10.0


 45%|████▍     | 449/1000 [01:57<01:55,  4.79it/s]

Episode 448, Reward: 10.0
Episode 449, Reward: 8.0


 45%|████▌     | 450/1000 [01:57<01:54,  4.80it/s]

Episode 450, Reward: 9.0


 45%|████▌     | 451/1000 [01:57<01:53,  4.82it/s]

Episode 451, Reward: 9.0


 45%|████▌     | 452/1000 [01:57<01:54,  4.80it/s]

Episode 452, Reward: 9.0


 45%|████▌     | 453/1000 [01:57<01:53,  4.82it/s]

Episode 453, Reward: 9.0


 45%|████▌     | 454/1000 [01:58<01:53,  4.79it/s]

Episode 454, Reward: 9.0


 46%|████▌     | 455/1000 [01:58<01:56,  4.68it/s]

Episode 455, Reward: 10.0


 46%|████▌     | 456/1000 [01:58<01:58,  4.58it/s]

Episode 456, Reward: 10.0


 46%|████▌     | 457/1000 [01:58<01:59,  4.53it/s]

Episode 457, Reward: 10.0


 46%|████▌     | 458/1000 [01:58<01:57,  4.60it/s]

Episode 458, Reward: 9.0


 46%|████▌     | 459/1000 [01:59<01:56,  4.66it/s]

Episode 459, Reward: 9.0


 46%|████▌     | 460/1000 [01:59<01:55,  4.67it/s]

Episode 460, Reward: 9.0


 46%|████▌     | 461/1000 [01:59<01:55,  4.68it/s]

Episode 461, Reward: 9.0


 46%|████▌     | 462/1000 [01:59<01:57,  4.59it/s]

Episode 462, Reward: 10.0


 46%|████▋     | 464/1000 [02:00<01:50,  4.84it/s]

Episode 463, Reward: 9.0
Episode 464, Reward: 8.0


 46%|████▋     | 465/1000 [02:00<01:53,  4.72it/s]

Episode 465, Reward: 10.0


 47%|████▋     | 466/1000 [02:00<01:55,  4.62it/s]

Episode 466, Reward: 9.0


 47%|████▋     | 467/1000 [02:00<01:57,  4.55it/s]

Episode 467, Reward: 9.0


 47%|████▋     | 468/1000 [02:01<02:00,  4.41it/s]

Episode 468, Reward: 10.0


 47%|████▋     | 469/1000 [02:01<02:01,  4.36it/s]

Episode 469, Reward: 10.0


 47%|████▋     | 470/1000 [02:01<02:01,  4.37it/s]

Episode 470, Reward: 9.0


 47%|████▋     | 471/1000 [02:01<02:00,  4.37it/s]

Episode 471, Reward: 9.0


 47%|████▋     | 472/1000 [02:02<02:01,  4.34it/s]

Episode 472, Reward: 9.0


 47%|████▋     | 473/1000 [02:02<02:01,  4.35it/s]

Episode 473, Reward: 9.0


 47%|████▋     | 474/1000 [02:02<02:02,  4.29it/s]

Episode 474, Reward: 10.0


 48%|████▊     | 475/1000 [02:02<02:26,  3.57it/s]

Episode 475, Reward: 10.0


 48%|████▊     | 476/1000 [02:03<02:22,  3.68it/s]

Episode 476, Reward: 10.0


 48%|████▊     | 477/1000 [02:03<02:13,  3.93it/s]

Episode 477, Reward: 9.0


 48%|████▊     | 478/1000 [02:03<02:09,  4.03it/s]

Episode 478, Reward: 10.0


 48%|████▊     | 479/1000 [02:03<02:03,  4.21it/s]

Episode 479, Reward: 9.0


 48%|████▊     | 480/1000 [02:04<02:01,  4.26it/s]

Episode 480, Reward: 10.0


 48%|████▊     | 481/1000 [02:04<02:00,  4.30it/s]

Episode 481, Reward: 10.0


 48%|████▊     | 483/1000 [02:04<01:50,  4.69it/s]

Episode 482, Reward: 9.0
Episode 483, Reward: 8.0


 48%|████▊     | 484/1000 [02:04<01:49,  4.72it/s]

Episode 484, Reward: 9.0


 48%|████▊     | 485/1000 [02:05<01:48,  4.74it/s]

Episode 485, Reward: 9.0


 49%|████▊     | 486/1000 [02:05<01:48,  4.75it/s]

Episode 486, Reward: 9.0


 49%|████▊     | 487/1000 [02:05<01:47,  4.76it/s]

Episode 487, Reward: 9.0


 49%|████▉     | 488/1000 [02:05<01:49,  4.66it/s]

Episode 488, Reward: 10.0


 49%|████▉     | 489/1000 [02:05<01:51,  4.59it/s]

Episode 489, Reward: 10.0


 49%|████▉     | 490/1000 [02:06<01:52,  4.54it/s]

Episode 490, Reward: 10.0


 49%|████▉     | 491/1000 [02:06<01:52,  4.51it/s]

Episode 491, Reward: 10.0


 49%|████▉     | 492/1000 [02:06<01:56,  4.37it/s]

Episode 492, Reward: 11.0


 49%|████▉     | 493/1000 [02:06<01:54,  4.42it/s]

Episode 493, Reward: 9.0


 49%|████▉     | 494/1000 [02:07<01:55,  4.39it/s]

Episode 494, Reward: 10.0


 50%|████▉     | 495/1000 [02:07<01:54,  4.39it/s]

Episode 495, Reward: 10.0


 50%|████▉     | 497/1000 [02:07<01:52,  4.46it/s]

Episode 496, Reward: 11.0
Episode 497, Reward: 9.0


 50%|████▉     | 498/1000 [02:08<01:52,  4.46it/s]

Episode 498, Reward: 10.0


 50%|████▉     | 499/1000 [02:08<01:49,  4.57it/s]

Episode 499, Reward: 9.0


 50%|█████     | 500/1000 [02:08<01:48,  4.61it/s]

Episode 500, Reward: 9.0


 50%|█████     | 501/1000 [02:08<01:50,  4.51it/s]

Episode 501, Reward: 10.0


 50%|█████     | 502/1000 [02:08<01:53,  4.38it/s]

Episode 502, Reward: 10.0


 50%|█████     | 503/1000 [02:09<01:54,  4.36it/s]

Episode 503, Reward: 10.0


 50%|█████     | 504/1000 [02:09<01:53,  4.38it/s]

Episode 504, Reward: 10.0


 50%|█████     | 505/1000 [02:09<01:53,  4.37it/s]

Episode 505, Reward: 10.0


 51%|█████     | 506/1000 [02:09<01:52,  4.38it/s]

Episode 506, Reward: 10.0


 51%|█████     | 508/1000 [02:10<01:45,  4.65it/s]

Episode 507, Reward: 10.0
Episode 508, Reward: 8.0


 51%|█████     | 509/1000 [02:10<01:44,  4.72it/s]

Episode 509, Reward: 9.0


 51%|█████     | 510/1000 [02:10<01:46,  4.62it/s]

Episode 510, Reward: 10.0


 51%|█████     | 511/1000 [02:10<01:45,  4.62it/s]

Episode 511, Reward: 9.0


 51%|█████     | 512/1000 [02:11<01:50,  4.42it/s]

Episode 512, Reward: 11.0


 51%|█████▏    | 513/1000 [02:11<01:50,  4.40it/s]

Episode 513, Reward: 10.0


 51%|█████▏    | 514/1000 [02:11<01:50,  4.41it/s]

Episode 514, Reward: 10.0


 52%|█████▏    | 515/1000 [02:11<01:55,  4.21it/s]

Episode 515, Reward: 8.0


 52%|█████▏    | 517/1000 [02:12<01:51,  4.32it/s]

Episode 516, Reward: 11.0
Episode 517, Reward: 8.0


 52%|█████▏    | 518/1000 [02:12<01:44,  4.62it/s]

Episode 518, Reward: 8.0


 52%|█████▏    | 519/1000 [02:12<01:45,  4.55it/s]

Episode 519, Reward: 10.0


 52%|█████▏    | 520/1000 [02:12<01:49,  4.38it/s]

Episode 520, Reward: 11.0


 52%|█████▏    | 521/1000 [02:13<01:48,  4.41it/s]

Episode 521, Reward: 9.0


 52%|█████▏    | 522/1000 [02:13<01:50,  4.31it/s]

Episode 522, Reward: 10.0


 52%|█████▏    | 523/1000 [02:13<01:50,  4.31it/s]

Episode 523, Reward: 9.0


 52%|█████▏    | 524/1000 [02:13<01:53,  4.20it/s]

Episode 524, Reward: 10.0


 52%|█████▎    | 525/1000 [02:14<01:54,  4.15it/s]

Episode 525, Reward: 10.0


 53%|█████▎    | 526/1000 [02:14<01:55,  4.11it/s]

Episode 526, Reward: 10.0


 53%|█████▎    | 527/1000 [02:14<01:55,  4.11it/s]

Episode 527, Reward: 10.0


 53%|█████▎    | 528/1000 [02:14<01:56,  4.05it/s]

Episode 528, Reward: 10.0


 53%|█████▎    | 529/1000 [02:15<01:55,  4.06it/s]

Episode 529, Reward: 10.0


 53%|█████▎    | 530/1000 [02:15<01:55,  4.07it/s]

Episode 530, Reward: 11.0


 53%|█████▎    | 531/1000 [02:15<01:55,  4.07it/s]

Episode 531, Reward: 11.0


 53%|█████▎    | 533/1000 [02:16<01:45,  4.43it/s]

Episode 532, Reward: 10.0
Episode 533, Reward: 8.0


 53%|█████▎    | 534/1000 [02:16<01:45,  4.43it/s]

Episode 534, Reward: 10.0


 54%|█████▎    | 535/1000 [02:16<01:45,  4.42it/s]

Episode 535, Reward: 10.0


 54%|█████▎    | 536/1000 [02:16<01:42,  4.54it/s]

Episode 536, Reward: 9.0


 54%|█████▎    | 537/1000 [02:17<01:45,  4.41it/s]

Episode 537, Reward: 10.0


 54%|█████▍    | 538/1000 [02:17<01:44,  4.41it/s]

Episode 538, Reward: 10.0


 54%|█████▍    | 539/1000 [02:17<01:42,  4.48it/s]

Episode 539, Reward: 9.0


 54%|█████▍    | 540/1000 [02:17<01:43,  4.45it/s]

Episode 540, Reward: 10.0


 54%|█████▍    | 541/1000 [02:17<01:41,  4.52it/s]

Episode 541, Reward: 9.0


 54%|█████▍    | 543/1000 [02:18<01:35,  4.77it/s]

Episode 542, Reward: 9.0
Episode 543, Reward: 8.0


 55%|█████▍    | 545/1000 [02:18<01:31,  4.95it/s]

Episode 544, Reward: 9.0
Episode 545, Reward: 8.0


 55%|█████▍    | 546/1000 [02:18<01:29,  5.07it/s]

Episode 546, Reward: 8.0


 55%|█████▍    | 547/1000 [02:19<01:32,  4.87it/s]

Episode 547, Reward: 9.0


 55%|█████▍    | 549/1000 [02:19<01:32,  4.89it/s]

Episode 548, Reward: 10.0
Episode 549, Reward: 8.0


 55%|█████▌    | 550/1000 [02:19<01:34,  4.74it/s]

Episode 550, Reward: 10.0


 55%|█████▌    | 551/1000 [02:19<01:38,  4.58it/s]

Episode 551, Reward: 10.0


 55%|█████▌    | 553/1000 [02:20<01:33,  4.80it/s]

Episode 552, Reward: 9.0
Episode 553, Reward: 8.0


 55%|█████▌    | 554/1000 [02:20<01:32,  4.82it/s]

Episode 554, Reward: 9.0


 56%|█████▌    | 555/1000 [02:20<01:32,  4.81it/s]

Episode 555, Reward: 9.0


 56%|█████▌    | 557/1000 [02:21<01:32,  4.77it/s]

Episode 556, Reward: 11.0
Episode 557, Reward: 8.0


 56%|█████▌    | 558/1000 [02:21<01:34,  4.67it/s]

Episode 558, Reward: 10.0


 56%|█████▌    | 559/1000 [02:21<01:34,  4.66it/s]

Episode 559, Reward: 9.0


 56%|█████▌    | 560/1000 [02:21<01:34,  4.67it/s]

Episode 560, Reward: 9.0


 56%|█████▌    | 561/1000 [02:22<01:36,  4.57it/s]

Episode 561, Reward: 10.0


 56%|█████▌    | 562/1000 [02:22<01:35,  4.59it/s]

Episode 562, Reward: 9.0


 56%|█████▋    | 563/1000 [02:22<01:36,  4.52it/s]

Episode 563, Reward: 10.0


 56%|█████▋    | 564/1000 [02:22<01:37,  4.46it/s]

Episode 564, Reward: 10.0


 56%|█████▋    | 565/1000 [02:23<01:38,  4.40it/s]

Episode 565, Reward: 10.0


 57%|█████▋    | 567/1000 [02:23<01:33,  4.63it/s]

Episode 566, Reward: 10.0
Episode 567, Reward: 8.0


 57%|█████▋    | 569/1000 [02:23<01:28,  4.89it/s]

Episode 568, Reward: 9.0
Episode 569, Reward: 8.0


 57%|█████▋    | 571/1000 [02:24<01:26,  4.96it/s]

Episode 570, Reward: 9.0
Episode 571, Reward: 8.0


 57%|█████▋    | 572/1000 [02:24<01:32,  4.64it/s]

Episode 572, Reward: 11.0


 57%|█████▋    | 573/1000 [02:24<01:33,  4.56it/s]

Episode 573, Reward: 10.0


 57%|█████▊    | 575/1000 [02:25<01:28,  4.82it/s]

Episode 574, Reward: 9.0
Episode 575, Reward: 8.0


 58%|█████▊    | 576/1000 [02:25<01:27,  4.82it/s]

Episode 576, Reward: 9.0


 58%|█████▊    | 577/1000 [02:25<01:30,  4.66it/s]

Episode 577, Reward: 9.0


 58%|█████▊    | 578/1000 [02:25<01:33,  4.54it/s]

Episode 578, Reward: 9.0


 58%|█████▊    | 579/1000 [02:26<01:37,  4.31it/s]

Episode 579, Reward: 11.0


 58%|█████▊    | 580/1000 [02:26<01:37,  4.33it/s]

Episode 580, Reward: 9.0


 58%|█████▊    | 581/1000 [02:26<01:40,  4.15it/s]

Episode 581, Reward: 11.0


 58%|█████▊    | 582/1000 [02:26<01:41,  4.12it/s]

Episode 582, Reward: 10.0


 58%|█████▊    | 583/1000 [02:26<01:40,  4.16it/s]

Episode 583, Reward: 9.0


 58%|█████▊    | 585/1000 [02:27<01:35,  4.33it/s]

Episode 584, Reward: 11.0
Episode 585, Reward: 8.0


 59%|█████▊    | 586/1000 [02:27<01:32,  4.47it/s]

Episode 586, Reward: 9.0


 59%|█████▉    | 588/1000 [02:28<01:26,  4.76it/s]

Episode 587, Reward: 9.0
Episode 588, Reward: 8.0


 59%|█████▉    | 590/1000 [02:28<01:26,  4.72it/s]

Episode 589, Reward: 11.0
Episode 590, Reward: 8.0


 59%|█████▉    | 591/1000 [02:28<01:26,  4.75it/s]

Episode 591, Reward: 9.0


 59%|█████▉    | 592/1000 [02:28<01:31,  4.47it/s]

Episode 592, Reward: 11.0


 59%|█████▉    | 593/1000 [02:29<01:32,  4.42it/s]

Episode 593, Reward: 10.0


 59%|█████▉    | 594/1000 [02:29<01:29,  4.53it/s]

Episode 594, Reward: 9.0


 60%|█████▉    | 595/1000 [02:29<01:28,  4.56it/s]

Episode 595, Reward: 9.0


 60%|█████▉    | 596/1000 [02:29<01:28,  4.59it/s]

Episode 596, Reward: 9.0


 60%|█████▉    | 597/1000 [02:30<01:29,  4.52it/s]

Episode 597, Reward: 10.0


 60%|█████▉    | 598/1000 [02:30<01:31,  4.37it/s]

Episode 598, Reward: 11.0


 60%|█████▉    | 599/1000 [02:30<01:31,  4.37it/s]

Episode 599, Reward: 10.0


 60%|██████    | 600/1000 [02:30<01:31,  4.36it/s]

Episode 600, Reward: 10.0


 60%|██████    | 601/1000 [02:31<01:32,  4.32it/s]

Episode 601, Reward: 10.0


 60%|██████    | 602/1000 [02:31<01:32,  4.30it/s]

Episode 602, Reward: 10.0


 60%|██████    | 603/1000 [02:31<01:32,  4.30it/s]

Episode 603, Reward: 10.0


 60%|██████    | 604/1000 [02:31<01:30,  4.40it/s]

Episode 604, Reward: 9.0


 60%|██████    | 605/1000 [02:31<01:30,  4.36it/s]

Episode 605, Reward: 10.0


 61%|██████    | 606/1000 [02:32<01:28,  4.45it/s]

Episode 606, Reward: 9.0


 61%|██████    | 607/1000 [02:32<01:28,  4.42it/s]

Episode 607, Reward: 10.0


 61%|██████    | 608/1000 [02:32<01:29,  4.38it/s]

Episode 608, Reward: 10.0


 61%|██████    | 609/1000 [02:32<01:27,  4.49it/s]

Episode 609, Reward: 9.0


 61%|██████    | 611/1000 [02:33<01:23,  4.68it/s]

Episode 610, Reward: 10.0
Episode 611, Reward: 8.0


 61%|██████    | 612/1000 [02:33<01:21,  4.74it/s]

Episode 612, Reward: 9.0


 61%|██████▏   | 613/1000 [02:33<01:22,  4.72it/s]

Episode 613, Reward: 9.0


 61%|██████▏   | 614/1000 [02:33<01:22,  4.69it/s]

Episode 614, Reward: 9.0


 62%|██████▏   | 615/1000 [02:34<01:21,  4.72it/s]

Episode 615, Reward: 9.0


 62%|██████▏   | 616/1000 [02:34<01:23,  4.58it/s]

Episode 616, Reward: 10.0


 62%|██████▏   | 617/1000 [02:34<01:26,  4.41it/s]

Episode 617, Reward: 11.0


 62%|██████▏   | 618/1000 [02:34<01:27,  4.38it/s]

Episode 618, Reward: 10.0


 62%|██████▏   | 619/1000 [02:35<01:28,  4.31it/s]

Episode 619, Reward: 10.0


 62%|██████▏   | 620/1000 [02:35<01:26,  4.41it/s]

Episode 620, Reward: 9.0


 62%|██████▏   | 621/1000 [02:35<01:24,  4.47it/s]

Episode 621, Reward: 9.0


 62%|██████▏   | 622/1000 [02:35<01:26,  4.37it/s]

Episode 622, Reward: 10.0


 62%|██████▏   | 623/1000 [02:35<01:25,  4.43it/s]

Episode 623, Reward: 9.0


 62%|██████▏   | 624/1000 [02:36<01:25,  4.41it/s]

Episode 624, Reward: 10.0


 62%|██████▎   | 625/1000 [02:36<01:25,  4.37it/s]

Episode 625, Reward: 10.0


 63%|██████▎   | 626/1000 [02:36<01:25,  4.37it/s]

Episode 626, Reward: 10.0


 63%|██████▎   | 627/1000 [02:36<01:23,  4.49it/s]

Episode 627, Reward: 9.0


 63%|██████▎   | 628/1000 [02:37<01:22,  4.51it/s]

Episode 628, Reward: 9.0


 63%|██████▎   | 629/1000 [02:37<01:21,  4.57it/s]

Episode 629, Reward: 9.0


 63%|██████▎   | 630/1000 [02:37<01:21,  4.55it/s]

Episode 630, Reward: 9.0


 63%|██████▎   | 631/1000 [02:37<01:23,  4.44it/s]

Episode 631, Reward: 9.0


 63%|██████▎   | 632/1000 [02:37<01:26,  4.27it/s]

Episode 632, Reward: 10.0


 63%|██████▎   | 633/1000 [02:38<01:25,  4.28it/s]

Episode 633, Reward: 9.0


 63%|██████▎   | 634/1000 [02:38<01:26,  4.25it/s]

Episode 634, Reward: 9.0


 64%|██████▎   | 635/1000 [02:38<01:26,  4.24it/s]

Episode 635, Reward: 9.0
Episode 636, Reward: 8.0

 64%|██████▎   | 636/1000 [02:38<01:22,  4.44it/s]




 64%|██████▍   | 638/1000 [02:39<01:20,  4.51it/s]

Episode 637, Reward: 10.0
Episode 638, Reward: 8.0


 64%|██████▍   | 640/1000 [02:39<01:17,  4.66it/s]

Episode 639, Reward: 9.0
Episode 640, Reward: 8.0


 64%|██████▍   | 641/1000 [02:39<01:17,  4.61it/s]

Episode 641, Reward: 9.0


 64%|██████▍   | 642/1000 [02:40<01:17,  4.63it/s]

Episode 642, Reward: 9.0


 64%|██████▍   | 643/1000 [02:40<01:16,  4.64it/s]

Episode 643, Reward: 9.0


 64%|██████▍   | 644/1000 [02:40<01:18,  4.56it/s]

Episode 644, Reward: 10.0


 64%|██████▍   | 645/1000 [02:40<01:18,  4.50it/s]

Episode 645, Reward: 10.0


 65%|██████▍   | 646/1000 [02:41<01:18,  4.51it/s]

Episode 646, Reward: 9.0


 65%|██████▍   | 647/1000 [02:41<01:19,  4.45it/s]

Episode 647, Reward: 10.0


 65%|██████▍   | 648/1000 [02:41<01:18,  4.51it/s]

Episode 648, Reward: 9.0


 65%|██████▍   | 649/1000 [02:41<01:19,  4.43it/s]

Episode 649, Reward: 10.0


 65%|██████▌   | 651/1000 [02:42<01:14,  4.69it/s]

Episode 650, Reward: 9.0
Episode 651, Reward: 8.0


 65%|██████▌   | 652/1000 [02:42<01:15,  4.58it/s]

Episode 652, Reward: 10.0


 65%|██████▌   | 653/1000 [02:42<01:16,  4.56it/s]

Episode 653, Reward: 9.0


 65%|██████▌   | 654/1000 [02:42<01:16,  4.53it/s]

Episode 654, Reward: 9.0


 66%|██████▌   | 655/1000 [02:43<01:15,  4.55it/s]

Episode 655, Reward: 9.0


 66%|██████▌   | 656/1000 [02:43<01:18,  4.39it/s]

Episode 656, Reward: 11.0


 66%|██████▌   | 658/1000 [02:43<01:12,  4.70it/s]

Episode 657, Reward: 9.0
Episode 658, Reward: 8.0


 66%|██████▌   | 659/1000 [02:43<01:15,  4.54it/s]

Episode 659, Reward: 10.0


 66%|██████▌   | 661/1000 [02:44<01:12,  4.67it/s]

Episode 660, Reward: 10.0
Episode 661, Reward: 8.0


 66%|██████▌   | 662/1000 [02:44<01:13,  4.60it/s]

Episode 662, Reward: 10.0


 66%|██████▋   | 663/1000 [02:44<01:14,  4.52it/s]

Episode 663, Reward: 10.0


 66%|██████▋   | 664/1000 [02:45<01:15,  4.44it/s]

Episode 664, Reward: 10.0


 66%|██████▋   | 665/1000 [02:45<01:16,  4.40it/s]

Episode 665, Reward: 10.0


 67%|██████▋   | 666/1000 [02:45<01:15,  4.45it/s]

Episode 666, Reward: 9.0


 67%|██████▋   | 667/1000 [02:45<01:16,  4.37it/s]

Episode 667, Reward: 10.0


 67%|██████▋   | 668/1000 [02:45<01:17,  4.31it/s]

Episode 668, Reward: 10.0


 67%|██████▋   | 669/1000 [02:46<01:17,  4.28it/s]

Episode 669, Reward: 10.0


 67%|██████▋   | 670/1000 [02:46<01:17,  4.27it/s]

Episode 670, Reward: 10.0


 67%|██████▋   | 671/1000 [02:46<01:15,  4.35it/s]

Episode 671, Reward: 9.0


 67%|██████▋   | 672/1000 [02:46<01:17,  4.23it/s]

Episode 672, Reward: 10.0


 67%|██████▋   | 673/1000 [02:47<01:16,  4.25it/s]

Episode 673, Reward: 10.0


 67%|██████▋   | 674/1000 [02:47<01:16,  4.26it/s]

Episode 674, Reward: 10.0


 68%|██████▊   | 675/1000 [02:47<01:14,  4.37it/s]

Episode 675, Reward: 9.0


 68%|██████▊   | 677/1000 [02:48<01:10,  4.60it/s]

Episode 676, Reward: 10.0
Episode 677, Reward: 8.0


 68%|██████▊   | 678/1000 [02:48<01:11,  4.52it/s]

Episode 678, Reward: 10.0


 68%|██████▊   | 679/1000 [02:48<01:11,  4.46it/s]

Episode 679, Reward: 10.0


 68%|██████▊   | 681/1000 [02:48<01:07,  4.73it/s]

Episode 680, Reward: 9.0
Episode 681, Reward: 8.0


 68%|██████▊   | 682/1000 [02:49<01:07,  4.72it/s]

Episode 682, Reward: 9.0


 68%|██████▊   | 683/1000 [02:49<01:08,  4.60it/s]

Episode 683, Reward: 10.0


 68%|██████▊   | 684/1000 [02:49<01:10,  4.51it/s]

Episode 684, Reward: 10.0


 68%|██████▊   | 685/1000 [02:49<01:10,  4.44it/s]

Episode 685, Reward: 9.0


 69%|██████▊   | 686/1000 [02:50<01:13,  4.26it/s]

Episode 686, Reward: 10.0


 69%|██████▊   | 687/1000 [02:50<01:15,  4.14it/s]

Episode 687, Reward: 10.0


 69%|██████▉   | 688/1000 [02:50<01:15,  4.14it/s]

Episode 688, Reward: 10.0


 69%|██████▉   | 689/1000 [02:50<01:15,  4.10it/s]

Episode 689, Reward: 10.0


 69%|██████▉   | 690/1000 [02:51<01:15,  4.08it/s]

Episode 690, Reward: 9.0


 69%|██████▉   | 691/1000 [02:51<01:15,  4.08it/s]

Episode 691, Reward: 10.0


 69%|██████▉   | 692/1000 [02:51<01:14,  4.11it/s]

Episode 692, Reward: 9.0


 69%|██████▉   | 694/1000 [02:51<01:09,  4.41it/s]

Episode 693, Reward: 9.0
Episode 694, Reward: 8.0


 70%|██████▉   | 695/1000 [02:52<01:11,  4.28it/s]

Episode 695, Reward: 11.0


 70%|██████▉   | 696/1000 [02:52<01:09,  4.38it/s]

Episode 696, Reward: 9.0


 70%|██████▉   | 697/1000 [02:52<01:09,  4.36it/s]

Episode 697, Reward: 10.0


 70%|██████▉   | 698/1000 [02:52<01:09,  4.34it/s]

Episode 698, Reward: 10.0


 70%|██████▉   | 699/1000 [02:53<01:10,  4.26it/s]

Episode 699, Reward: 10.0


 70%|███████   | 701/1000 [02:53<01:04,  4.60it/s]

Episode 700, Reward: 9.0
Episode 701, Reward: 8.0


 70%|███████   | 702/1000 [02:53<01:03,  4.66it/s]

Episode 702, Reward: 9.0


 70%|███████   | 703/1000 [02:53<01:05,  4.51it/s]

Episode 703, Reward: 10.0


 70%|███████   | 704/1000 [02:54<01:06,  4.44it/s]

Episode 704, Reward: 10.0


 70%|███████   | 705/1000 [02:54<01:06,  4.47it/s]

Episode 705, Reward: 9.0


 71%|███████   | 706/1000 [02:54<01:05,  4.52it/s]

Episode 706, Reward: 9.0


 71%|███████   | 707/1000 [02:54<01:07,  4.33it/s]

Episode 707, Reward: 11.0


 71%|███████   | 708/1000 [02:55<01:06,  4.42it/s]

Episode 708, Reward: 9.0


 71%|███████   | 709/1000 [02:55<01:05,  4.47it/s]

Episode 709, Reward: 9.0


 71%|███████   | 710/1000 [02:55<01:03,  4.53it/s]

Episode 710, Reward: 9.0


 71%|███████   | 711/1000 [02:55<01:04,  4.46it/s]

Episode 711, Reward: 10.0


 71%|███████   | 712/1000 [02:56<01:03,  4.50it/s]

Episode 712, Reward: 9.0


 71%|███████▏  | 713/1000 [02:56<01:04,  4.48it/s]

Episode 713, Reward: 9.0


 71%|███████▏  | 714/1000 [02:56<01:03,  4.54it/s]

Episode 714, Reward: 9.0


 72%|███████▏  | 715/1000 [02:56<01:02,  4.59it/s]

Episode 715, Reward: 9.0


 72%|███████▏  | 716/1000 [02:56<01:03,  4.48it/s]

Episode 716, Reward: 10.0


 72%|███████▏  | 718/1000 [02:57<01:00,  4.63it/s]

Episode 717, Reward: 10.0
Episode 718, Reward: 8.0


 72%|███████▏  | 719/1000 [02:57<01:00,  4.67it/s]

Episode 719, Reward: 9.0


 72%|███████▏  | 720/1000 [02:57<01:02,  4.46it/s]

Episode 720, Reward: 11.0


 72%|███████▏  | 721/1000 [02:58<01:03,  4.38it/s]

Episode 721, Reward: 10.0


 72%|███████▏  | 722/1000 [02:58<01:04,  4.28it/s]

Episode 722, Reward: 10.0


 72%|███████▏  | 723/1000 [02:58<01:02,  4.41it/s]

Episode 723, Reward: 9.0


 72%|███████▏  | 724/1000 [02:58<01:02,  4.45it/s]

Episode 724, Reward: 9.0


 72%|███████▎  | 725/1000 [02:58<01:01,  4.47it/s]

Episode 725, Reward: 9.0


 73%|███████▎  | 726/1000 [02:59<01:00,  4.52it/s]

Episode 726, Reward: 9.0


 73%|███████▎  | 727/1000 [02:59<01:01,  4.44it/s]

Episode 727, Reward: 10.0


 73%|███████▎  | 728/1000 [02:59<01:02,  4.37it/s]

Episode 728, Reward: 10.0


 73%|███████▎  | 730/1000 [03:00<00:57,  4.68it/s]

Episode 729, Reward: 9.0
Episode 730, Reward: 8.0


 73%|███████▎  | 731/1000 [03:00<00:58,  4.59it/s]

Episode 731, Reward: 10.0


 73%|███████▎  | 732/1000 [03:00<00:59,  4.49it/s]

Episode 732, Reward: 10.0


 73%|███████▎  | 733/1000 [03:00<01:00,  4.41it/s]

Episode 733, Reward: 10.0


 73%|███████▎  | 734/1000 [03:00<01:00,  4.43it/s]

Episode 734, Reward: 9.0


 74%|███████▎  | 735/1000 [03:01<01:00,  4.39it/s]

Episode 735, Reward: 10.0


 74%|███████▎  | 736/1000 [03:01<00:59,  4.45it/s]

Episode 736, Reward: 9.0


 74%|███████▎  | 737/1000 [03:01<00:59,  4.40it/s]

Episode 737, Reward: 10.0


 74%|███████▍  | 738/1000 [03:01<00:58,  4.47it/s]

Episode 738, Reward: 9.0


 74%|███████▍  | 739/1000 [03:02<00:59,  4.35it/s]

Episode 739, Reward: 9.0


 74%|███████▍  | 740/1000 [03:02<00:59,  4.35it/s]

Episode 740, Reward: 9.0


 74%|███████▍  | 741/1000 [03:02<01:01,  4.22it/s]

Episode 741, Reward: 10.0


 74%|███████▍  | 742/1000 [03:02<01:02,  4.11it/s]

Episode 742, Reward: 10.0


 74%|███████▍  | 743/1000 [03:03<01:03,  4.04it/s]

Episode 743, Reward: 10.0


 74%|███████▍  | 745/1000 [03:03<00:59,  4.32it/s]

Episode 744, Reward: 9.0
Episode 745, Reward: 8.0


 75%|███████▍  | 746/1000 [03:03<00:59,  4.24it/s]

Episode 746, Reward: 10.0


 75%|███████▍  | 747/1000 [03:04<01:00,  4.19it/s]

Episode 747, Reward: 9.0


 75%|███████▍  | 748/1000 [03:04<00:59,  4.21it/s]

Episode 748, Reward: 10.0


 75%|███████▍  | 749/1000 [03:04<00:58,  4.32it/s]

Episode 749, Reward: 9.0


 75%|███████▌  | 750/1000 [03:04<00:56,  4.41it/s]

Episode 750, Reward: 9.0


 75%|███████▌  | 751/1000 [03:04<00:55,  4.45it/s]

Episode 751, Reward: 9.0


 75%|███████▌  | 752/1000 [03:05<00:55,  4.50it/s]

Episode 752, Reward: 9.0


 75%|███████▌  | 753/1000 [03:05<00:54,  4.54it/s]

Episode 753, Reward: 9.0


 75%|███████▌  | 754/1000 [03:05<00:55,  4.47it/s]

Episode 754, Reward: 10.0


 76%|███████▌  | 755/1000 [03:05<00:56,  4.31it/s]

Episode 755, Reward: 11.0


 76%|███████▌  | 756/1000 [03:06<00:57,  4.27it/s]

Episode 756, Reward: 10.0


 76%|███████▌  | 757/1000 [03:06<00:56,  4.27it/s]

Episode 757, Reward: 10.0


 76%|███████▌  | 758/1000 [03:06<00:55,  4.34it/s]

Episode 758, Reward: 9.0


 76%|███████▌  | 760/1000 [03:06<00:51,  4.63it/s]

Episode 759, Reward: 9.0
Episode 760, Reward: 8.0


 76%|███████▌  | 761/1000 [03:07<00:51,  4.68it/s]

Episode 761, Reward: 9.0


 76%|███████▌  | 762/1000 [03:07<00:51,  4.65it/s]

Episode 762, Reward: 9.0


 76%|███████▋  | 763/1000 [03:07<00:52,  4.53it/s]

Episode 763, Reward: 10.0


 76%|███████▋  | 764/1000 [03:07<00:52,  4.46it/s]

Episode 764, Reward: 10.0


 76%|███████▋  | 765/1000 [03:08<00:52,  4.48it/s]

Episode 765, Reward: 9.0


 77%|███████▋  | 766/1000 [03:08<00:51,  4.52it/s]

Episode 766, Reward: 9.0


 77%|███████▋  | 767/1000 [03:08<00:52,  4.46it/s]

Episode 767, Reward: 10.0


 77%|███████▋  | 768/1000 [03:08<00:52,  4.39it/s]

Episode 768, Reward: 10.0


 77%|███████▋  | 769/1000 [03:08<00:52,  4.44it/s]

Episode 769, Reward: 9.0


 77%|███████▋  | 771/1000 [03:09<00:49,  4.61it/s]

Episode 770, Reward: 10.0
Episode 771, Reward: 8.0


 77%|███████▋  | 772/1000 [03:09<00:51,  4.41it/s]

Episode 772, Reward: 11.0


 77%|███████▋  | 773/1000 [03:09<00:51,  4.37it/s]

Episode 773, Reward: 10.0


 77%|███████▋  | 774/1000 [03:10<00:52,  4.32it/s]

Episode 774, Reward: 10.0


 78%|███████▊  | 775/1000 [03:10<00:52,  4.27it/s]

Episode 775, Reward: 9.0


 78%|███████▊  | 776/1000 [03:10<00:53,  4.18it/s]

Episode 776, Reward: 10.0


 78%|███████▊  | 777/1000 [03:10<00:53,  4.18it/s]

Episode 777, Reward: 9.0


 78%|███████▊  | 778/1000 [03:11<00:53,  4.15it/s]

Episode 778, Reward: 9.0


 78%|███████▊  | 779/1000 [03:11<00:53,  4.10it/s]

Episode 779, Reward: 10.0


 78%|███████▊  | 780/1000 [03:11<00:54,  4.07it/s]

Episode 780, Reward: 10.0


 78%|███████▊  | 781/1000 [03:11<00:53,  4.12it/s]

Episode 781, Reward: 9.0


 78%|███████▊  | 782/1000 [03:11<00:50,  4.28it/s]

Episode 782, Reward: 8.0


 78%|███████▊  | 783/1000 [03:12<00:51,  4.19it/s]

Episode 783, Reward: 10.0


 78%|███████▊  | 784/1000 [03:12<00:50,  4.28it/s]

Episode 784, Reward: 9.0


 78%|███████▊  | 785/1000 [03:12<00:49,  4.37it/s]

Episode 785, Reward: 9.0


 79%|███████▊  | 786/1000 [03:12<00:48,  4.40it/s]

Episode 786, Reward: 9.0


 79%|███████▊  | 787/1000 [03:13<00:48,  4.37it/s]

Episode 787, Reward: 10.0


 79%|███████▉  | 788/1000 [03:13<00:47,  4.43it/s]

Episode 788, Reward: 9.0


 79%|███████▉  | 789/1000 [03:13<00:47,  4.47it/s]

Episode 789, Reward: 9.0


 79%|███████▉  | 790/1000 [03:13<00:46,  4.50it/s]

Episode 790, Reward: 9.0


 79%|███████▉  | 791/1000 [03:14<00:47,  4.39it/s]

Episode 791, Reward: 10.0


 79%|███████▉  | 793/1000 [03:14<00:46,  4.45it/s]

Episode 792, Reward: 10.0
Episode 793, Reward: 8.0


 79%|███████▉  | 794/1000 [03:14<00:47,  4.37it/s]

Episode 794, Reward: 9.0


 80%|███████▉  | 795/1000 [03:14<00:47,  4.28it/s]

Episode 795, Reward: 9.0


 80%|███████▉  | 796/1000 [03:15<00:47,  4.31it/s]

Episode 796, Reward: 9.0


 80%|███████▉  | 797/1000 [03:15<00:47,  4.27it/s]

Episode 797, Reward: 9.0


 80%|███████▉  | 798/1000 [03:15<00:47,  4.21it/s]

Episode 798, Reward: 9.0


 80%|███████▉  | 799/1000 [03:15<00:46,  4.31it/s]

Episode 799, Reward: 8.0


 80%|████████  | 800/1000 [03:16<00:45,  4.40it/s]

Episode 800, Reward: 8.0


 80%|████████  | 802/1000 [03:16<00:43,  4.56it/s]

Episode 801, Reward: 9.0
Episode 802, Reward: 8.0


 80%|████████  | 803/1000 [03:16<00:44,  4.45it/s]

Episode 803, Reward: 10.0


 80%|████████  | 804/1000 [03:17<00:44,  4.44it/s]

Episode 804, Reward: 9.0


 80%|████████  | 805/1000 [03:17<00:44,  4.35it/s]

Episode 805, Reward: 10.0


 81%|████████  | 806/1000 [03:17<00:44,  4.40it/s]

Episode 806, Reward: 9.0


 81%|████████  | 807/1000 [03:17<00:43,  4.40it/s]

Episode 807, Reward: 9.0


 81%|████████  | 808/1000 [03:17<00:44,  4.29it/s]

Episode 808, Reward: 10.0


 81%|████████  | 810/1000 [03:18<00:41,  4.53it/s]

Episode 809, Reward: 10.0
Episode 810, Reward: 8.0


 81%|████████  | 811/1000 [03:18<00:42,  4.46it/s]

Episode 811, Reward: 10.0


 81%|████████  | 812/1000 [03:18<00:41,  4.52it/s]

Episode 812, Reward: 9.0


 81%|████████▏ | 813/1000 [03:19<00:41,  4.49it/s]

Episode 813, Reward: 9.0


 81%|████████▏ | 814/1000 [03:19<00:42,  4.37it/s]

Episode 814, Reward: 10.0


 82%|████████▏ | 815/1000 [03:19<00:42,  4.33it/s]

Episode 815, Reward: 10.0


 82%|████████▏ | 816/1000 [03:19<00:43,  4.28it/s]

Episode 816, Reward: 10.0


 82%|████████▏ | 818/1000 [03:20<00:39,  4.56it/s]

Episode 817, Reward: 9.0
Episode 818, Reward: 8.0


 82%|████████▏ | 819/1000 [03:20<00:39,  4.62it/s]

Episode 819, Reward: 9.0


 82%|████████▏ | 820/1000 [03:20<00:39,  4.51it/s]

Episode 820, Reward: 10.0


 82%|████████▏ | 821/1000 [03:20<00:39,  4.52it/s]

Episode 821, Reward: 9.0


 82%|████████▏ | 822/1000 [03:21<00:39,  4.53it/s]

Episode 822, Reward: 9.0


 82%|████████▏ | 824/1000 [03:21<00:37,  4.66it/s]

Episode 823, Reward: 10.0
Episode 824, Reward: 8.0


 82%|████████▎ | 825/1000 [03:21<00:38,  4.57it/s]

Episode 825, Reward: 10.0


 83%|████████▎ | 826/1000 [03:21<00:38,  4.47it/s]

Episode 826, Reward: 9.0


 83%|████████▎ | 827/1000 [03:22<00:38,  4.51it/s]

Episode 827, Reward: 9.0


 83%|████████▎ | 829/1000 [03:22<00:36,  4.63it/s]

Episode 828, Reward: 9.0
Episode 829, Reward: 8.0


 83%|████████▎ | 831/1000 [03:23<00:35,  4.70it/s]

Episode 830, Reward: 10.0
Episode 831, Reward: 8.0


 83%|████████▎ | 832/1000 [03:23<00:36,  4.58it/s]

Episode 832, Reward: 10.0


 83%|████████▎ | 833/1000 [03:23<00:36,  4.57it/s]

Episode 833, Reward: 9.0


 84%|████████▎ | 835/1000 [03:23<00:35,  4.66it/s]

Episode 834, Reward: 10.0
Episode 835, Reward: 8.0


 84%|████████▎ | 836/1000 [03:24<00:35,  4.67it/s]

Episode 836, Reward: 9.0


 84%|████████▍ | 838/1000 [03:24<00:34,  4.75it/s]

Episode 837, Reward: 10.0
Episode 838, Reward: 8.0


 84%|████████▍ | 839/1000 [03:24<00:33,  4.74it/s]

Episode 839, Reward: 9.0


 84%|████████▍ | 840/1000 [03:25<00:35,  4.51it/s]

Episode 840, Reward: 10.0


 84%|████████▍ | 841/1000 [03:25<00:35,  4.42it/s]

Episode 841, Reward: 10.0


 84%|████████▍ | 842/1000 [03:25<00:35,  4.47it/s]

Episode 842, Reward: 9.0


 84%|████████▍ | 843/1000 [03:25<00:35,  4.38it/s]

Episode 843, Reward: 10.0


 84%|████████▍ | 844/1000 [03:25<00:35,  4.42it/s]

Episode 844, Reward: 9.0


 84%|████████▍ | 845/1000 [03:26<00:35,  4.38it/s]

Episode 845, Reward: 10.0


 85%|████████▍ | 846/1000 [03:26<00:35,  4.33it/s]

Episode 846, Reward: 10.0


 85%|████████▍ | 847/1000 [03:26<00:37,  4.12it/s]

Episode 847, Reward: 11.0


 85%|████████▍ | 848/1000 [03:26<00:37,  4.04it/s]

Episode 848, Reward: 10.0


 85%|████████▍ | 849/1000 [03:27<00:37,  3.98it/s]

Episode 849, Reward: 10.0


 85%|████████▌ | 850/1000 [03:27<00:37,  3.97it/s]

Episode 850, Reward: 10.0


 85%|████████▌ | 851/1000 [03:27<00:37,  3.96it/s]

Episode 851, Reward: 9.0


 85%|████████▌ | 852/1000 [03:27<00:37,  3.96it/s]

Episode 852, Reward: 9.0


 85%|████████▌ | 853/1000 [03:28<00:35,  4.14it/s]

Episode 853, Reward: 8.0


 85%|████████▌ | 854/1000 [03:28<00:34,  4.17it/s]

Episode 854, Reward: 9.0


 86%|████████▌ | 855/1000 [03:28<00:33,  4.32it/s]

Episode 855, Reward: 8.0


 86%|████████▌ | 856/1000 [03:28<00:33,  4.30it/s]

Episode 856, Reward: 10.0


 86%|████████▌ | 857/1000 [03:29<00:32,  4.34it/s]

Episode 857, Reward: 9.0


 86%|████████▌ | 858/1000 [03:29<00:32,  4.40it/s]

Episode 858, Reward: 9.0


 86%|████████▌ | 859/1000 [03:29<00:32,  4.34it/s]

Episode 859, Reward: 10.0


 86%|████████▌ | 860/1000 [03:29<00:31,  4.42it/s]

Episode 860, Reward: 9.0


 86%|████████▌ | 861/1000 [03:29<00:32,  4.30it/s]

Episode 861, Reward: 10.0


 86%|████████▋ | 863/1000 [03:30<00:29,  4.57it/s]

Episode 862, Reward: 9.0
Episode 863, Reward: 8.0


 86%|████████▋ | 864/1000 [03:30<00:30,  4.48it/s]

Episode 864, Reward: 10.0


 86%|████████▋ | 865/1000 [03:30<00:29,  4.52it/s]

Episode 865, Reward: 9.0


 87%|████████▋ | 866/1000 [03:31<00:30,  4.40it/s]

Episode 866, Reward: 10.0


 87%|████████▋ | 867/1000 [03:31<00:30,  4.33it/s]

Episode 867, Reward: 10.0


 87%|████████▋ | 868/1000 [03:31<00:30,  4.28it/s]

Episode 868, Reward: 10.0


 87%|████████▋ | 869/1000 [03:31<00:30,  4.35it/s]

Episode 869, Reward: 9.0


 87%|████████▋ | 870/1000 [03:32<00:29,  4.33it/s]

Episode 870, Reward: 9.0


 87%|████████▋ | 872/1000 [03:32<00:27,  4.58it/s]

Episode 871, Reward: 9.0
Episode 872, Reward: 8.0


 87%|████████▋ | 873/1000 [03:32<00:26,  4.79it/s]

Episode 873, Reward: 8.0


 87%|████████▋ | 874/1000 [03:32<00:27,  4.60it/s]

Episode 874, Reward: 10.0


 88%|████████▊ | 875/1000 [03:33<00:27,  4.60it/s]

Episode 875, Reward: 9.0


 88%|████████▊ | 876/1000 [03:33<00:27,  4.54it/s]

Episode 876, Reward: 9.0


 88%|████████▊ | 877/1000 [03:33<00:27,  4.44it/s]

Episode 877, Reward: 10.0


 88%|████████▊ | 878/1000 [03:33<00:27,  4.37it/s]

Episode 878, Reward: 10.0


 88%|████████▊ | 879/1000 [03:34<00:27,  4.34it/s]

Episode 879, Reward: 9.0


 88%|████████▊ | 881/1000 [03:34<00:26,  4.53it/s]

Episode 880, Reward: 10.0
Episode 881, Reward: 8.0


 88%|████████▊ | 882/1000 [03:34<00:26,  4.49it/s]

Episode 882, Reward: 10.0


 88%|████████▊ | 883/1000 [03:34<00:26,  4.36it/s]

Episode 883, Reward: 10.0


 88%|████████▊ | 884/1000 [03:35<00:26,  4.33it/s]

Episode 884, Reward: 10.0


 88%|████████▊ | 885/1000 [03:35<00:26,  4.41it/s]

Episode 885, Reward: 9.0


 89%|████████▊ | 886/1000 [03:35<00:26,  4.36it/s]

Episode 886, Reward: 10.0


 89%|████████▊ | 887/1000 [03:35<00:26,  4.32it/s]

Episode 887, Reward: 10.0


 89%|████████▉ | 888/1000 [03:36<00:26,  4.30it/s]

Episode 888, Reward: 10.0


 89%|████████▉ | 889/1000 [03:36<00:26,  4.26it/s]

Episode 889, Reward: 10.0


 89%|████████▉ | 890/1000 [03:36<00:26,  4.23it/s]

Episode 890, Reward: 10.0


 89%|████████▉ | 892/1000 [03:37<00:24,  4.41it/s]

Episode 891, Reward: 10.0
Episode 892, Reward: 8.0


 89%|████████▉ | 893/1000 [03:37<00:24,  4.38it/s]

Episode 893, Reward: 10.0


 89%|████████▉ | 894/1000 [03:37<00:24,  4.26it/s]

Episode 894, Reward: 11.0


 90%|████████▉ | 895/1000 [03:37<00:24,  4.33it/s]

Episode 895, Reward: 9.0


 90%|████████▉ | 896/1000 [03:37<00:24,  4.22it/s]

Episode 896, Reward: 10.0


 90%|████████▉ | 898/1000 [03:38<00:22,  4.46it/s]

Episode 897, Reward: 10.0
Episode 898, Reward: 8.0


 90%|████████▉ | 899/1000 [03:38<00:22,  4.55it/s]

Episode 899, Reward: 9.0


 90%|█████████ | 900/1000 [03:38<00:22,  4.45it/s]

Episode 900, Reward: 9.0


 90%|█████████ | 901/1000 [03:39<00:22,  4.42it/s]

Episode 901, Reward: 8.0


 90%|█████████ | 902/1000 [03:39<00:22,  4.30it/s]

Episode 902, Reward: 9.0


 90%|█████████ | 903/1000 [03:39<00:23,  4.16it/s]

Episode 903, Reward: 10.0


 90%|█████████ | 904/1000 [03:39<00:22,  4.22it/s]

Episode 904, Reward: 9.0


 90%|█████████ | 905/1000 [03:40<00:22,  4.20it/s]

Episode 905, Reward: 9.0


 91%|█████████ | 906/1000 [03:40<00:21,  4.37it/s]

Episode 906, Reward: 8.0


 91%|█████████ | 907/1000 [03:40<00:21,  4.40it/s]

Episode 907, Reward: 8.0


 91%|█████████ | 908/1000 [03:40<00:21,  4.25it/s]

Episode 908, Reward: 10.0


 91%|█████████ | 909/1000 [03:40<00:21,  4.28it/s]

Episode 909, Reward: 8.0


 91%|█████████ | 910/1000 [03:41<00:20,  4.42it/s]

Episode 910, Reward: 8.0


 91%|█████████ | 911/1000 [03:41<00:20,  4.38it/s]

Episode 911, Reward: 10.0


 91%|█████████ | 912/1000 [03:41<00:20,  4.33it/s]

Episode 912, Reward: 10.0


 91%|█████████▏| 914/1000 [03:42<00:19,  4.52it/s]

Episode 913, Reward: 10.0
Episode 914, Reward: 8.0


 92%|█████████▏| 915/1000 [03:42<00:19,  4.35it/s]

Episode 915, Reward: 11.0


 92%|█████████▏| 916/1000 [03:42<00:19,  4.23it/s]

Episode 916, Reward: 10.0


 92%|█████████▏| 917/1000 [03:42<00:19,  4.19it/s]

Episode 917, Reward: 10.0


 92%|█████████▏| 918/1000 [03:43<00:19,  4.23it/s]

Episode 918, Reward: 9.0


 92%|█████████▏| 919/1000 [03:43<00:18,  4.31it/s]

Episode 919, Reward: 9.0


 92%|█████████▏| 921/1000 [03:43<00:17,  4.50it/s]

Episode 920, Reward: 10.0
Episode 921, Reward: 8.0


 92%|█████████▏| 922/1000 [03:43<00:17,  4.35it/s]

Episode 922, Reward: 10.0


 92%|█████████▏| 923/1000 [03:44<00:18,  4.28it/s]

Episode 923, Reward: 10.0


 92%|█████████▏| 924/1000 [03:44<00:17,  4.25it/s]

Episode 924, Reward: 10.0


 92%|█████████▎| 925/1000 [03:44<00:18,  4.13it/s]

Episode 925, Reward: 10.0


 93%|█████████▎| 926/1000 [03:44<00:17,  4.11it/s]

Episode 926, Reward: 10.0


 93%|█████████▎| 927/1000 [03:45<00:17,  4.22it/s]

Episode 927, Reward: 9.0


 93%|█████████▎| 929/1000 [03:45<00:15,  4.59it/s]

Episode 928, Reward: 9.0
Episode 929, Reward: 8.0


 93%|█████████▎| 930/1000 [03:45<00:15,  4.51it/s]

Episode 930, Reward: 10.0


 93%|█████████▎| 931/1000 [03:46<00:15,  4.37it/s]

Episode 931, Reward: 10.0


 93%|█████████▎| 932/1000 [03:46<00:15,  4.42it/s]

Episode 932, Reward: 9.0


 93%|█████████▎| 933/1000 [03:46<00:15,  4.37it/s]

Episode 933, Reward: 10.0


 93%|█████████▎| 934/1000 [03:46<00:15,  4.31it/s]

Episode 934, Reward: 10.0


 94%|█████████▎| 935/1000 [03:46<00:14,  4.42it/s]

Episode 935, Reward: 8.0


 94%|█████████▎| 936/1000 [03:47<00:14,  4.37it/s]

Episode 936, Reward: 10.0


 94%|█████████▎| 937/1000 [03:47<00:14,  4.37it/s]

Episode 937, Reward: 9.0


 94%|█████████▍| 938/1000 [03:47<00:14,  4.38it/s]

Episode 938, Reward: 9.0


 94%|█████████▍| 939/1000 [03:47<00:13,  4.41it/s]

Episode 939, Reward: 9.0


 94%|█████████▍| 941/1000 [03:48<00:12,  4.56it/s]

Episode 940, Reward: 10.0
Episode 941, Reward: 8.0


 94%|█████████▍| 942/1000 [03:48<00:12,  4.49it/s]

Episode 942, Reward: 10.0


 94%|█████████▍| 943/1000 [03:48<00:12,  4.42it/s]

Episode 943, Reward: 10.0


 94%|█████████▍| 944/1000 [03:49<00:13,  4.30it/s]

Episode 944, Reward: 10.0


 94%|█████████▍| 945/1000 [03:49<00:12,  4.27it/s]

Episode 945, Reward: 10.0


 95%|█████████▍| 946/1000 [03:49<00:12,  4.25it/s]

Episode 946, Reward: 10.0


 95%|█████████▍| 947/1000 [03:49<00:12,  4.20it/s]

Episode 947, Reward: 10.0


 95%|█████████▍| 948/1000 [03:49<00:12,  4.07it/s]

Episode 948, Reward: 11.0


 95%|█████████▍| 949/1000 [03:50<00:12,  4.17it/s]

Episode 949, Reward: 9.0


 95%|█████████▌| 950/1000 [03:50<00:11,  4.27it/s]

Episode 950, Reward: 9.0


 95%|█████████▌| 951/1000 [03:50<00:11,  4.32it/s]

Episode 951, Reward: 9.0


 95%|█████████▌| 952/1000 [03:50<00:11,  4.26it/s]

Episode 952, Reward: 10.0


 95%|█████████▌| 953/1000 [03:51<00:11,  4.23it/s]

Episode 953, Reward: 10.0


 95%|█████████▌| 954/1000 [03:51<00:10,  4.38it/s]

Episode 954, Reward: 8.0


 96%|█████████▌| 955/1000 [03:51<00:10,  4.23it/s]

Episode 955, Reward: 10.0


 96%|█████████▌| 956/1000 [03:51<00:10,  4.21it/s]

Episode 956, Reward: 9.0


 96%|█████████▌| 957/1000 [03:52<00:09,  4.34it/s]

Episode 957, Reward: 8.0


 96%|█████████▌| 958/1000 [03:52<00:09,  4.48it/s]

Episode 958, Reward: 8.0


 96%|█████████▌| 959/1000 [03:52<00:09,  4.48it/s]

Episode 959, Reward: 8.0


 96%|█████████▌| 960/1000 [03:52<00:09,  4.35it/s]

Episode 960, Reward: 9.0


 96%|█████████▌| 961/1000 [03:53<00:09,  4.17it/s]

Episode 961, Reward: 10.0


 96%|█████████▌| 962/1000 [03:53<00:09,  4.18it/s]

Episode 962, Reward: 9.0


 96%|█████████▋| 963/1000 [03:53<00:08,  4.35it/s]

Episode 963, Reward: 8.0


 96%|█████████▋| 964/1000 [03:53<00:08,  4.42it/s]

Episode 964, Reward: 9.0


 96%|█████████▋| 965/1000 [03:53<00:08,  4.33it/s]

Episode 965, Reward: 10.0


 97%|█████████▋| 966/1000 [03:54<00:07,  4.29it/s]

Episode 966, Reward: 10.0


 97%|█████████▋| 967/1000 [03:54<00:07,  4.20it/s]

Episode 967, Reward: 11.0


 97%|█████████▋| 968/1000 [03:54<00:07,  4.19it/s]

Episode 968, Reward: 10.0


 97%|█████████▋| 969/1000 [03:54<00:07,  4.11it/s]

Episode 969, Reward: 10.0


 97%|█████████▋| 970/1000 [03:55<00:07,  4.14it/s]

Episode 970, Reward: 10.0


 97%|█████████▋| 971/1000 [03:55<00:07,  4.14it/s]

Episode 971, Reward: 10.0


 97%|█████████▋| 972/1000 [03:55<00:06,  4.14it/s]

Episode 972, Reward: 10.0


 97%|█████████▋| 973/1000 [03:55<00:06,  4.24it/s]

Episode 973, Reward: 9.0


 98%|█████████▊| 975/1000 [03:56<00:05,  4.47it/s]

Episode 974, Reward: 10.0
Episode 975, Reward: 8.0


 98%|█████████▊| 977/1000 [03:56<00:04,  4.80it/s]

Episode 976, Reward: 8.0
Episode 977, Reward: 8.0


 98%|█████████▊| 978/1000 [03:56<00:04,  4.63it/s]

Episode 978, Reward: 10.0


 98%|█████████▊| 979/1000 [03:57<00:04,  4.55it/s]

Episode 979, Reward: 9.0


 98%|█████████▊| 980/1000 [03:57<00:04,  4.37it/s]

Episode 980, Reward: 11.0


 98%|█████████▊| 981/1000 [03:57<00:04,  4.29it/s]

Episode 981, Reward: 10.0


 98%|█████████▊| 982/1000 [03:57<00:04,  4.22it/s]

Episode 982, Reward: 10.0


 98%|█████████▊| 983/1000 [03:58<00:03,  4.26it/s]

Episode 983, Reward: 9.0


 98%|█████████▊| 984/1000 [03:58<00:03,  4.33it/s]

Episode 984, Reward: 9.0


 98%|█████████▊| 985/1000 [03:58<00:03,  4.37it/s]

Episode 985, Reward: 9.0


 99%|█████████▊| 986/1000 [03:58<00:03,  4.30it/s]

Episode 986, Reward: 10.0


 99%|█████████▊| 987/1000 [03:59<00:03,  4.22it/s]

Episode 987, Reward: 9.0


 99%|█████████▉| 988/1000 [03:59<00:02,  4.21it/s]

Episode 988, Reward: 10.0


 99%|█████████▉| 989/1000 [03:59<00:02,  4.26it/s]

Episode 989, Reward: 9.0


 99%|█████████▉| 990/1000 [03:59<00:02,  4.18it/s]

Episode 990, Reward: 11.0


 99%|█████████▉| 991/1000 [03:59<00:02,  4.19it/s]

Episode 991, Reward: 9.0


 99%|█████████▉| 992/1000 [04:00<00:01,  4.19it/s]

Episode 992, Reward: 10.0


 99%|█████████▉| 993/1000 [04:00<00:01,  4.25it/s]

Episode 993, Reward: 9.0


 99%|█████████▉| 994/1000 [04:00<00:01,  4.23it/s]

Episode 994, Reward: 10.0


100%|█████████▉| 995/1000 [04:00<00:01,  4.08it/s]

Episode 995, Reward: 11.0


100%|█████████▉| 996/1000 [04:01<00:00,  4.09it/s]

Episode 996, Reward: 10.0


100%|█████████▉| 997/1000 [04:01<00:00,  4.12it/s]

Episode 997, Reward: 10.0


100%|█████████▉| 998/1000 [04:01<00:00,  4.13it/s]

Episode 998, Reward: 10.0


100%|█████████▉| 999/1000 [04:01<00:00,  4.26it/s]

Episode 999, Reward: 9.0


100%|██████████| 1000/1000 [04:02<00:00,  4.13it/s]

Episode 1000, Reward: 8.0





Test Episode Reward: 10.0
